In [16]:
import torch
import numpy
import pandas as pd
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix
from plot_confusion_matrix import plot_confusion_matrix

In [17]:
torch.cuda.empty_cache()

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [19]:
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 40
batch_size = 64
learning_rate = 0.001

### MNIST dataset 

In [20]:
modification=transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
        transforms.Lambda(lambda x: x.expand(3, -1, -1))
    ])

In [21]:
train_dataset = torchvision.datasets.MNIST(root='..//set', 
                                           train=True, 
                                           transform=modification,  
                                           download=True)

In [22]:
test_dataset = torchvision.datasets.MNIST(root='..///set', 
                                          train=False, 
                                          transform=modification)

In [23]:
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

### Model

In [24]:
model = torch.hub.load('pytorch/vision:v0.9.0', 'alexnet', pretrained=True).to(device)

Using cache found in C:\Users\Szymon/.cache\torch\hub\pytorch_vision_v0.9.0


### Loss and optimizer

In [25]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
running_loss = 0
losses = []

### Train Model

In [26]:
epochs_arr=[]
losses_arr=[]
epochs_loss=0

In [None]:
size = len(train_loader.dataset)
print(size)

for epoch in range(num_epochs):
    epochs_loss=0
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images).to(device)
        loss = criterion(outputs, labels)
        
        # Backprpagation and optimization
        optimizer.zero_grad()
        loss.backward()
        
        #calculate epochs_loss
        epochs_loss = epochs_loss + loss.item()
        optimizer.step()
        
        if (i+1) % int(size/batch_size) == 0:
            print('Loss: {} - Epoch: {}'.format(loss.item(), epoch))
            losses_arr.append(loss.item())

              

60000
Loss: 0.10505634546279907 - Epoch: 0
Loss: 0.037940166890621185 - Epoch: 1
Loss: 0.07745969295501709 - Epoch: 2
Loss: 0.10060498118400574 - Epoch: 3
Loss: 0.05440204590559006 - Epoch: 4
Loss: 0.03416360542178154 - Epoch: 5
Loss: 0.07240372151136398 - Epoch: 6
Loss: 0.02537292242050171 - Epoch: 7
Loss: 0.013587714172899723 - Epoch: 8
Loss: 0.08778408169746399 - Epoch: 9
Loss: 0.027065884321928024 - Epoch: 10
Loss: 0.004768729209899902 - Epoch: 11


In [None]:
epochs_arr = list(range(num_epochs))

fig = plt.figure()
ax1 = fig.add_subplot()
ax1.set_ylabel('loss')
ax1.set_xlabel('number of epochs')


plt.plot(epochs_arr, losses_arr)
plt.show()

In [None]:
losses_arr

### Test the model

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), './models/squeeznet.ckpt')

### Confusion Matrix

In [None]:
@torch.no_grad()
def get_all_preds(model, loader):
    all_preds = torch.tensor([]).to(device)
    for batch in loader:
        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        preds = model(images).to(device)
        all_preds = torch.cat(
            (all_preds, preds)
            ,dim=0
        )
    return all_preds

In [None]:
with torch.no_grad():
    prediction_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
    train_preds = get_all_preds(model, prediction_loader).to(device)
    
train_preds.shape
train_preds.grad_fn
train_preds.grad

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

preds_correct = get_num_correct(train_preds, test_dataset.targets.to(device) )
print('total correct', preds_correct)
print('accuracy', preds_correct / len(test_dataset))
train_preds.is_cuda

In [None]:
stacked = torch.stack((test_dataset.targets.to(device), train_preds.argmax(dim=1)), dim=1)

In [None]:
stacked[9].tolist()

In [None]:
cmt = torch.zeros(10,10, dtype=torch.int64)

In [None]:
for p in stacked:
    j,k = p.tolist()
    cmt[j,k] = cmt[j, k] + 1

In [None]:
cmt

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [None]:
lb = test_dataset.targets.detach().cpu().numpy()

In [None]:
rp = train_preds.argmax(dim=1).detach().cpu().numpy()
print(rp)

In [None]:
cm = confusion_matrix(lb, rp)

In [None]:
names = set(lb)
plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, names)