## Alexnet 



In [None]:
# Import PyTorch
import torch
from torch import nn
import torch.nn.functional as F

# Import torchvision 
import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor

# Import matplotlib for visualization
import matplotlib.pyplot as plt



# Check versions
# Note: your PyTorch version shouldn't be lower than 1.10.0 and torchvision version shouldn't be lower than 0.11
print(f"PyTorch version: {torch.__version__}\ntorchvision version: {torchvision.__version__}")

In [None]:
# Setup training data
from torchvision import transforms


train_data = datasets.FashionMNIST(
    root="data", # where to download data to?
    train=True, # get training data
    download=True, # download data if it doesn't exist on disk
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x:x.repeat(3,8,8)),
        transforms.RandomHorizontalFlip(p=1),
        transforms.RandomRotation((90,90),expand=False, center=None, fill=0),

    ]),
     # images come as PIL format, we want to turn into Torch tensors
    target_transform= None # you can transform labels as well
)

# Setup testing data
test_data = datasets.FashionMNIST(
    root="data",
    train=False, # get test data
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x:x.repeat(3,8,8)),
        transforms.RandomHorizontalFlip(p=1),
        transforms.RandomRotation((90,90),expand=False, center=None, fill=0),
      
    ])
)

In [None]:
# See first training sample
image, label = train_data[0]
image, label


In [None]:
# What's the shape of the image?
image.shape


In [None]:
# How many samples are there? 
len(train_data.data), len(train_data.targets), len(test_data.data), len(test_data.targets)

In [None]:
# See classes
class_names = train_data.classes
class_names

In [None]:
from torch.utils.data import DataLoader

# Setup the batch size hyperparameter
BATCH_SIZE = 32

# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False # don't necessarily have to shuffle the testing data
)

# Let's check out what we've created
print(f"Dataloaders: {train_dataloader, test_dataloader}") 
print(f"Length of train dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}")
print(f"Length of test dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}")

In [None]:
# Check out what's inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape



In [None]:
class AlexNet(nn.Module): 
	def __init__(self, num_classes=10): 
		# Call the parent class's init method to initialize the base class 
		super(AlexNet, self).__init__() 
		
		# First Convolutional Layer with 11x11 filters, stride of 4, and 2 padding 
		self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2) 
		
		# Max Pooling Layer with a kernel size of 3 and stride of 2 
		self.pool = nn.MaxPool2d(kernel_size=3, stride=2) 
		
		# Second Convolutional Layer with 5x5 filters and 2 padding 
		self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, padding=2) 
		
		# Third Convolutional Layer with 3x3 filters and 1 padding 
		self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1) 
		
		# Fourth Convolutional Layer with 3x3 filters and 1 padding 
		self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1) 
		
		# Fifth Convolutional Layer with 3x3 filters and 1 padding 
		self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1) 
		
		# First Fully Connected Layer with 4096 output features 
		self.fc1 = nn.Linear(in_features=256 * 6 * 6, out_features=4096) 
		
		# Second Fully Connected Layer with 4096 output features 
		self.fc2 = nn.Linear(in_features=4096, out_features=4096) 
		
		# Output Layer with `num_classes` output features 
		self.fc3 = nn.Linear(in_features=4096, out_features=num_classes) 

	def forward(self, x): 
		# Pass the input through the first convolutional layer and ReLU activation function 
		x = self.pool(F.relu(self.conv1(x))) 
		
		# Pass the output of the first layer through 
		# the second convolutional layer and ReLU activation function 
		x = self.pool(F.relu(self.conv2(x))) 
		
		# Pass the output of the second layer through 
		# the third convolutional layer and ReLU activation function 
		x = F.relu(self.conv3(x)) 
		
		# Pass the output of the third layer through 
		# the fourth convolutional layer and ReLU activation function 
		x = F.relu(self.conv4(x)) 
		
		# Pass the output of the fourth layer through 
		# the fifth convolutional layer and ReLU activation function 
		x = self.pool(F.relu(self.conv5(x))) 
		
		# Reshape the output to be passed through the fully connected layers 
		x = x.view(-1, 256 * 6 * 6) 
		
		# Pass the output through the first fully connected layer and activation function 
		x = F.relu(self.fc1(x)) 
		x = F.dropout(x, 0.5)	 
		
		# Pass the output of the first fully connected layer through 
		# the second fully connected layer and activation function 
		x = F.relu(self.fc2(x)) 
		
		# Pass the output of the second fully connected layer through the output layer 
		x = self.fc3(x) 
		
		# Return the final output 
		return x 
										
torch.manual_seed(42)							
alexnet = AlexNet() 
print(alexnet)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


from helpers_function import accuracy_fn

#define loss funtion & optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(alexnet.parameters(), lr=0.01)

In [None]:
torch.manual_seed(42)
trainingloss=[]
trainingaccuracy=[]
testloss=[]
testaccuracy=[]
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device
               ):
    train_loss, train_acc = 0, 0
    # trainingloss=[]
    # trainingaccuracy=[]
    model.to(device)
    torch.autograd.set_detect_anomaly(True),
    for batch, (X, y) in enumerate(data_loader):
        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()
        # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")
    trainingloss.append(train_loss)
    trainingaccuracy.append(train_acc)

def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    # testloss=[]
    # testaccuracy=[]
    torch.autograd.set_detect_anomaly(True),
    model.to(device)
    model.eval() # put model in eval mode

    # Turn on inference context manager
    with torch.inference_mode():
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)

            # 1. Forward pass
            test_pred = model(X)

            # 2. Calculate loss and accuracy
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y,
                y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )

        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")
        testloss.append(test_loss)
        testaccuracy.append(test_acc)

In [None]:
torch.manual_seed(42)

# Measure time
from timeit import default_timer as timer

from helpers_function import print_train_time
train_time_start_model_2 = timer()

# Train and test model 
epochs = 5
for epoch in (range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=alexnet, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn,
        device=device
    )
    test_step(data_loader=test_dataloader,
        model=alexnet,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn,
        device=device
    )

train_time_end_model_2 = timer()
total_train_time_model_2 = print_train_time(start=train_time_start_model_2,
                                           end=train_time_end_model_2,
                                           device=device)

In [None]:
gpu_tensor = torch.tensor(trainingloss, device=device)
cpu_tensor = gpu_tensor.cpu()
numpy_array = cpu_tensor.numpy()

gpu_tensor1=torch.tensor(testloss,device=device)
cpu_tensor1=gpu_tensor1.cpu()
numpy_array1=cpu_tensor1.numpy()


print(numpy_array)
print(numpy_array1)

plt.plot(numpy_array)
plt.plot(numpy_array1)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training & Test Loss')
plt.show()



gpu_tensor2 = torch.tensor(trainingaccuracy, device=device)
cpu_tensor2 = gpu_tensor2.cpu()
numpy_array2 = cpu_tensor2.numpy()

gpu_tensor3=torch.tensor(testaccuracy,device=device)
cpu_tensor3=gpu_tensor3.cpu()
numpy_array3=cpu_tensor3.numpy()


print(numpy_array2)
print(numpy_array3)

plt.plot(trainingaccuracy)
plt.plot(testaccuracy)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training & Test Accuracy')
plt.show()

In [None]:
# Move values to device
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn, 
               device: torch.device = device):
    """Evaluates a given model on a given dataset.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.
        device (str, optional): Target device to compute on. Defaults to device.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Send data to the target device
            X, y = X.to(device), y.to(device)
            
            
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        
        # Scale loss and acc
        loss /= len(data_loader)
        acc /= len(data_loader)
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

# Calculate model 1 results with device-agnostic code 
model_results = eval_model(model=alexnet, data_loader=test_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn,
    device=device
)
model_results
