In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib qt 


# torchvision 

**torchvision** is a package in the PyTorch library containing computer vision-related utilities. It provides access to popular datasets, model architectures, and commonly used image **transformations** for computer vision. It makes it easy to load and preprocess data for training and evaluation of deep learning models for computer vision tasks.

# tranforms 
**transforms** is a module within the torchvision package that provides common transformations. These transformations can be used to preprocess data before feeding it into a neural network.

In [3]:
PATH = './cifar_net.pth'

transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])





## hypermeters 

In [4]:
epochs = 5
batch_size = 16
lr =0.005
momentum = 0.9

torch.random.manual_seed(42)


<torch._C.Generator at 0x7ff03a0186b0>

## loading data with pytorch 
In PyTorch, loading a dataset involves creating an instance of a dataset object and passing it to a DataLoader. The dataset object can be a custom dataset that you define, or one of the many built-in datasets provided by the **torchvision, torchaudio, and torchtext packages**. These built-in datasets automatically download and **preprocess** the data, making it easy to get started with training machine learning models. Once you have created an instance of the dataset, you can pass it to a **DataLoader** along with some additional parameters such as the **batch size** and whether the data should be **shuffled**. The DataLoader returns an **iterator** that yields batches of data, which can be easily iterated over in a training loop to feed batches of data into the model.


In [5]:


trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Get some random training images
S = np.random.randint(0, len(trainloader.dataset.targets), batch_size)
for s in S:
    print(s)
    plt.figure()
    img = trainloader.dataset.data[s]
    plt.imshow(img/255.)
    plt.title(classes[trainloader.dataset.targets[s]])
plt.show()

396
42301
44939
12457
43945
47596
8647
7497
40283
4894
25850
5221
29856
14888
10697
19135


In [7]:
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [8]:
# get some random training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
# show images
imshow(torchvision.utils.make_grid(images))

frog  plane deer  car   bird  horse truck deer  horse ship  deer  dog   frog  plane deer  bird 


In [9]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 24, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(24, 50, 5)
        self.fc1 = nn.Linear(50 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, len(classes))
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        # x = self.softmax(x)
        return x

net = Net()
net

Net(
  (conv1): Conv2d(3, 24, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(24, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=1250, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)

In [11]:
for epoch in range(epochs):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):

        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')



[1,  2000] loss: 1.695
[2,  2000] loss: 1.159
[3,  2000] loss: 0.956
[4,  2000] loss: 0.821
[5,  2000] loss: 0.736
Finished Training


In [12]:
torch.save(net.state_dict(), PATH)


## eval
 

In [13]:
net2 = Net()

net2.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
correct = 0
total = 0
with torch.no_grad(): # Since we're not training, we don't need to calculate the gradients for our outputs
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net2(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the {total} test images: {100 * correct // total} %')


In [None]:
dataiter = iter(testloader)
images, labels = next(dataiter)

print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}' for j in range(batch_size)))

# print images
imshow(torchvision.utils.make_grid(images))

## confusion matrix
A confusion matrix is a table used to evaluate the performance of a classification model. It shows the number of true positives, false positives, true negatives, and false negatives for each class. The rows represent the true class labels, while the columns represent the predicted class labels. Diagonal entries represent correct classifications, while off-diagonal entries represent misclassifications. A confusion matrix can be used to calculate performance metrics such as accuracy, precision, recall, and F1-score.

In [None]:


# Get the predicted labels for the test data
predicted_labels = []
for images, labels in testloader:
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    predicted_labels.extend(predicted.tolist())

# Get the true labels for the test data
true_labels = testset.targets

# Create the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Normalize the confusion matrix
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Visualize the normalized confusion matrix

sns.heatmap(cm_normalized, annot=True, cmap='Blues', fmt='.2%')
plt.show()
