In [2]:
from torchvision import datasets
import matplotlib.pyplot as plt
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
import cv2
import numpy as n

In [3]:
datapath = 'C:/Users/juana/Desktop/Data Science/pytorch'
cifar10 = datasets.CIFAR10(datapath, train = True, download = True)
cifar10_val = datasets.CIFAR10(datapath, train = False, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
len(cifar10)

50000

In [5]:
#Pytorch transforms the image to a PIL object
img, lab = cifar10[99]

In [6]:
cv2.imshow('wut', img)
#plt.show()

error: OpenCV(4.5.3) :-1: error: (-5:Bad argument) in function 'imshow'
> Overload resolution failed:
>  - mat is not a numpy array, neither a scalar
>  - Expected Ptr<cv::cuda::GpuMat> for argument 'mat'
>  - Expected Ptr<cv::UMat> for argument 'mat'


In [None]:
## Transforms makes the images (and numpy arrays) into tensors witht the function "ToTensor"
to_tensor = transforms.ToTensor()
img_t = to_tensor(img)
img_t.shape

In [None]:
#we can even set this funtion from the moment que download the dataset
tensor_cifar10 = datasets.CIFAR10(datapath, download = True, train = True, transform = transforms.ToTensor())
tensor_cifar10_val = datasets.CIFAR10(datapath, download = True, train = False, transform = transforms.ToTensor())

In [None]:
#How can we visualize the data if now is a tensor?
img_t, lab = tensor_cifar10[99]

In [None]:
plt.imshow(img_t.permute(1,2,0)) #Changes the order of the axes from  C × H × W   to    H × W × C
plt.show()

In [None]:
#Normalizzing the data

#--------Steps:

#---- 1) Let’s stack all the tensors returned by the dataset along an extra dimension:

imgs = torch.stack([img_t for img_t, _ in tensor_cifar10], dim= 3)
imgs.shape


In [None]:
#-------2) Now we can easily compute the mean per channel

#Recall that view(3, -1) keeps the three channels and  merges all the remaining dimensions into one, figuring  out the appropriate size. 
#Here our 3 × 32 × 32 image is transformed into a 3 × 1,024 vector, and then the mean  is taken over the 1,024 elements of each channel.

#mean
print(imgs.view(3,-1).mean(dim=1))

#std
print(imgs.view(3,-1).std(dim=1))

# put the values from the two operations into the Normalizae function from transforms
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))

# and concatenate it after the ToTensor transform

transf_cifar10 = datasets.CIFAR10(datapath, train = True, download = True, 
                                  transform  = transforms.Compose([transforms.ToTensor(),
                                               transforms.Normalize((0.4914, 0.4822, 0.4465),(0.247, 0.2435, 0.2616))]))

In [None]:
img_n, _ = transf_cifar10[99]
plt.imshow(img_n.permute(1,2,0)) #no olvidar el permute
plt.show()

# Distinguishing birds from airplanes

In [None]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']

cifar2 = [(img, label_map[label]) 
          for img, label in tensor_cifar10
          if label in [0, 2]]

cifar2_val = [(img, label_map[label])
              for img, label in tensor_cifar10_val
              if label in [0, 2]]

In [None]:
n_out = 2

#create a normal linear model
model = nn.Sequential(nn.Linear(3072, 512,), nn.Tanh(),nn.Linear(512, n_out,))

In [None]:
#Use a activation functoin Softmax

softmax = nn.Softmax(dim = 1) #-----------> nn.Softmax requires us to specify the dimension along which the softmax function is applied:

In [None]:
#putting all together

model = nn.Sequential(nn.Linear(3072, 512), nn.Tanh(), nn.Linear(512, 2), nn.Softmax(dim=1))

### We can actually try running the model before even training it. Let’s do it, just to see what comes out.

In [None]:
#let's getan image

img, _ = tensor_cifar10[0]

plt.imshow(img.permute(1,2,0))
plt.show()


In [None]:
# In order to call the model, we need to make the input have the right dimensions. 
# We recall that our model expects 3,072 features in the input, and that nn works with data organized into batches along the zeroth dimension. So we need to
#turn our 3 × 32 × 32 image into a 1D tensor and then add an extra dimension

img_batch = img.view(-1).unsqueeze(0)

In [None]:
out = model(img_batch)
out

In [None]:
#see the argmax. the index thas the max probability
_, index = torch.max(out, dim=1)
index #it says the image is a plane

### Time to get training. As in the previous two chapters, we need a loss to minimize during training. 

In [None]:
model = nn.Sequential(nn.Linear(3072, 512),nn.Tanh(),nn.Linear(512, 2),nn.LogSoftmax(dim=1))

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss() #to evalueate the results

n_epochs = 100
for epoch in range(n_epochs):
    for img, label in cifar2:
        out = model(img.view(-1).unsqueeze(0))#convert to 1D array
        loss = loss_fn(out, torch.tensor([label])) 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

### Dataloader

In [None]:
#  DataLoader constructor takes a Dataset object as input, along with batch_size and a shuffle Boolean that indicates whether
# the data needs to be shuffled at the beginning of each epoch. Is used as an hyperparameter for batchsizes and shuffle

train_loader = torch.utils.data.DataLoader(cifar2, batch_size = 64, shuffle=True)

model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 2),
    nn.LogSoftmax(dim=1))

learning_rate = 1e-2
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.NLLLoss() # bien nn.CrossEntropyLoss()

n_epochs = 10
for epoch in range(n_epochs):
    for imgs, labels in train_loader: #we just change it here <-----------
        batch_size = imgs.shape[0]
        out = model(imgs.view(batch_size, -1)) #no need to convert the batches to 1D
        loss = loss_fn(out, labels) 
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch: %d, Loss: %f" % (epoch, float(loss))) #Due to the shuffling, this now prints the loss for a random batch—clearly something we
                                                        #want to improve in chapter 8.

In [None]:
#we can compute the accuracy of our model on the validation set

val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size= 64, shuffle = False)

correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]#get the batch size
        outputs = model(imgs.view(batch_size, -1))#pass the model through the batches
        _, predicted = torch.max(outputs, dim=1)#predict the max value of the prediction made by the model
        total += labels.shape[0]#get the labels shape
        correct += int((predicted == labels).sum()) #transform the predicted result (is a float) and see if it matches the label number. if so, add it
        print("Accuracy: %f", correct / total)

In [None]:
#calculate how many parameters does the model have

numel_list = [p.numel() for p in model.parameters() if p.requires_grad == True]
sum(numel_list), numel_list #105 million parameters

#howdid we get that, remember a linear layer computes y = weight * x + bias parameters, so y = 512*3072+512. y needs to length 512