# Topic 2 Machine Learning for Regression and Classification


## Introduction to Pytorch Framework

### Install Pytorch

In [None]:
import torch

In [None]:
print(torch.__version__)

In [None]:
print("Cuda Current Device: ", torch.cuda.current_device())
print("Cude Device Count: ", torch.cuda.device_count())
print("Cude Device Name: ", torch.cuda.get_device_name(0))
print("Cude Device Available : ", torch.cuda.is_available())

### Basic Pytorch Operations (Optional, Backup)

In [None]:
# Create a Torch Vector
a = [1, 2, 3]
b = torch.Tensor(a)
# b = torch.FloatTensor(a)
# b = torch.DoubleTensor(a)
# b = torch.IntTensor(a)
# b = torch.LongTensor(a)
print(b)
print(b[0])

In [None]:
a = [1, 2, 3]
b = torch.tensor(a)
print(b)
print(b[0])

In [None]:
# Create a Torch Matrix
a = [[1, 2, 3], [4, 5, 6]]
b = torch.tensor(a)
print(b)
print(b[0])

In [None]:
# Create a 3D Tensor
a = [[[1., 2.], [3., 4.]],
     [[5., 6.], [7., 8.]]]
b = torch.tensor(a)
print(b)

In [None]:
# Conversion from Tensor to numpy
a = torch.tensor([3])
print(a)
b = a.numpy()
print(b)

In [None]:
# Conversion from numpy to Tensor
import numpy as np
a = np.arange(6).reshape(2,3)
print(a)
b = torch.from_numpy(a)
print(b)

In [None]:
# Numpy functions

a = [1,2,3,4]
b = np.array(a)
c = np.sum(b)
d = np.mean(b)
e = np.max(b)
print(c,d,e)


In [None]:
# Torch functions
a = [1.,2.,3.,4.]
b = torch.tensor(a)
c = torch.sum(b)
d = torch.mean(b)
e = torch.max(b)
print(c,d,e)

In [None]:
# Tensor operations
a = torch.tensor([1,1])
b = torch.tensor([2,2])
print(a+b)
print(torch.add(a, b))

In [None]:
# Tensor operations with GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
a = torch.tensor([1,1]).to(device)
b = torch.tensor([2,2]).to(device)
c = a+b
print(c)
print(c.cpu().numpy())

In [None]:
# Activity: Tensor Operation with GPU
a = torch.tensor([3]).to(device)
b = torch.tensor([4]).to(device)
c = torch.tensor([5]).to(device)
d = a*b+c
print(d)
print(d.cpu().numpy())

In [None]:
# Torch matrix multiplication
mat1 = torch.randn(2, 3)
mat2 = torch.randn(3, 3)
torch.mm(mat1, mat2)

In [None]:
# Activity: Matrix operation
x = torch.tensor([[1,1]])
w = torch.tensor([[1,2],[3,4]])
b = torch.tensor([[2,2]])
print(torch.mm(x,w)+b)


In [None]:
# Generate Special Torch Tensors
a = torch.diag(torch.tensor([1,2,3]))
a = torch.eye(3)
a

In [None]:
# Torch linspace
a = torch.linspace(1,10,10)
a

In [None]:
# Create uniform random numbers from 0 to 1
a = torch.rand(5, 3)
a

In [None]:
# Create gaussion random numbers with mean 0 and std 1
a = torch.randn(5, 3)
a

In [None]:
# Torch Max
a = torch.tensor([[1,0,0],[1,0,0],[0,1,0],[0,0,1]])
print(torch.max(a,1))

In [None]:
# Activity: Torch Max
a = torch.tensor([[3,4,-5,2,7,3]])
torch.max(a,1)

In [None]:
# Reshape a torch tensor
a = torch.linspace(1,10,10).view(2,5)
a = torch.linspace(1,10,10).reshape(2,5)
# a = torch.linspace(1,10,10).view(-1,2)
# a = torch.linspace(1,10,10).reshape(-1,2)

print(a)

In [None]:
# Unsqueeze and squeeze dimensions
x = torch.linspace(0, 5, 5)
print(x)
x = torch.unsqueeze(x, dim=0) 
print(x)

In [None]:
x = torch.linspace(0, 5, 5)
print(x)
x = torch.unsqueeze(x, dim=1) 
print(x)

In [None]:
# Concatenate
x = torch.tensor([1,2,3])
y = torch.cat((x,x,x))
print(y)

In [None]:
# Transpose
x = torch.tensor([[1,2],[3,4]])
y = torch.t(x)
print(x)
print(y)

In [None]:
# Activity: Tensor Operations

x = torch.tensor([1,1])
x = torch.unsqueeze(x,dim=0)
#print(x.shape)
w = torch.tensor([[1,2],[3,4]])
#print(w.shape)
b = torch.tensor([[2],[2]])
b = torch.t(b)
#print(b.shape)
y = torch.mm(x,w)+b
print(y)

In [None]:
# Gradient and Back Propagation
x = torch.tensor([5.],requires_grad=True)
y = x*x
y.backward()

x.grad.item()

In [None]:
x = torch.tensor(1.0, requires_grad = True)
y = 2*x**2
z = y**3

z.backward()

x.grad.item()

In [None]:
# Gradient
x = torch.tensor([-2.], requires_grad=True)
y = torch.tensor([5.],requires_grad=True)
z = torch.tensor([-4.], requires_grad=True)
f = (x+y)*z    

f.backward()

print('x gradient = ',x.grad.item())    
print('y gradient = ',y.grad.item())     
print('z gradient = ',z.grad.item())    

In [None]:
# Difference between .item and .data
a = torch.randn(1)
print(a.item())
print(a.data)
print(a)

In [None]:
# Activity: Gradient
x = torch.tensor([2.], requires_grad=True)
w = torch.tensor([3.], requires_grad=True)
b = torch.tensor([4.], requires_grad=True)
y = w*x + b    

# Compute gradients
y.backward()

# Print out the gradients.
print('x gradient = ', x.grad.item())     
print('w gradient = ', w.grad.item())    
print('b gradient = ', b.grad.item())

# Print out the gradients.
print('x gradient = ', x.grad)    
print('w gradient = ', w.grad)
print('b gradient = ', b.grad)



In [None]:
# Activity: Compute Gradient
x = torch.tensor([2.],requires_grad=True)
w = torch.tensor([3.],requires_grad=True)
b = torch.tensor([4.],requires_grad=True)
y = w * x + b    

y.backward()

In [None]:
# Activity: Compute Gradient

x = torch.tensor(1.0, requires_grad = True)
y = 2*x**2
z = y**3

z.backward() #Computes the gradient 
print(x.grad.item()) #Print dz/dx 

## Build a Regression Model Using NN


### Activation Functions

In [None]:
import torch
import matplotlib.pyplot as plt 

x = torch.linspace(-10,10,100)

plt.figure(figsize=(15,5))

# Relu Activation Function
x_relu = torch.relu(x)

plt.subplot(1,3,1)
plt.plot(x,x_relu)
plt.title('relu')

# Sigmoid Activation Function
x_sigmoid = torch.sigmoid(x)

plt.subplot(1,3,2)
plt.plot(x,x_sigmoid)
plt.title('sigmoid')

# Hyperbolic Tanh Activation Function
x_tanh = torch.tanh(x)

plt.subplot(1,3,3)
plt.plot(x,x_tanh)
plt.title('tanh')

plt.show()

### Simple Linear Regression 

In [None]:
# Step 1: Setup
import torch

X = torch.tensor([1.,2.,3,4,5])
y = torch.tensor([0,-1.1,-1.8,-3.1,-4.5])

W = torch.rand(1,requires_grad=True)
b = torch.rand(1,requires_grad=True)

In [None]:
# Step 2: Optimizer
learning_rate = 0.001

optimizer = torch.optim.SGD([W,b], lr=learning_rate)

In [None]:
# Step 3: Train the Model

for i in range(1000):
    # Model
    yhat = X*W+b

    # Loss Function
    loss = (yhat-y).pow(2).sum()

    #Compute gradient
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%50==0: print(f'i:{i}, W:{W.item()}, b:{b.item()}, loss:{loss.item()}')

In [None]:
# Alternative Method using MSELoss method
# Step 3: Train the Model

criterion = torch.nn.MSELoss()

for i in range(1000):
    # Model
    yhat = X*W+b

    # Loss Function
    loss = criterion(yhat,y)

    #Compute gradient
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%50==0: print(f'i:{i}, W:{W.item()}, b:{b.item()}, loss:{loss.item()}')

In [None]:
# Step 4: Evaluate the Model
import matplotlib.pyplot as plt 

W_ = W.item()
b_ = b.item()

plt.plot(X,y,'o')
plt.plot(X,X*W_+b_,'r')
plt.show()


In [None]:
# Explicit Zero Grad Demo
# Without using optimizer

import torch

learning_rate = 0.001

X = torch.tensor([1.,2.,3,4,5])
y = torch.tensor([0,-1.1,-1.8,-3.1,-4.5])

W = torch.randn(1, requires_grad=True)
b = torch.randn(1, requires_grad=True)

for i in range(1000):
    yhat = X*W+b

    loss = (yhat - y).pow(2).sum()
    loss.backward()

    W.data -= learning_rate * W.grad.item()
    b.data -= learning_rate * b.grad.item()

    W.grad.data.zero_()
    b.grad.data.zero_()
    
    if i%50==0: print(f'i:{i}, W:{W.item()}, b:{b.item()}, loss:{loss.item()}')


In [None]:
# Step 4: Evaluate the Model
import matplotlib.pyplot as plt 

W_ = W.item()
b_ = b.item()

plt.plot(X,y,'o')
plt.plot(X,X*W_+b_,'r')
plt.show()


### Neural Network Predictive Regression Model

In [None]:
# Step 1: Setup
import torch

X = torch.tensor([1.,2.,3,4,5])
y = torch.tensor([0,-1.1,-1.8,-3.1,-4.5])

X = torch.unsqueeze(X, dim=1) 
y = torch.unsqueeze(y, dim=1) 

In [None]:
# Step 2: Define Model

import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 3
L2 = 5

class Model(nn.Module):

    def __init__(self):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(1,L1)     
        self.fc2 = nn.Linear(L1,L2)
        self.fc3 = nn.Linear(L2,1)

    def forward(self, x):
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)             
        return x

model = Model() 
print(model) 

In [None]:
# Alternative way to define the model
# Step 2: Define Model

import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 3
L2 = 5

model = nn.Sequential(nn.Linear(1, L1),
                      nn.ReLU(),
                      nn.Linear(L1, L2),
                      nn.ReLU(),
                      nn.Linear(L2, 1))
print(model) 

In [None]:
# Step 3: Select Optimizer
learning_rate = 0.01

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Step 4: Training

for i in range(1000):

	# Model prediction
    yhat = model(X)

    # Compute loss
    loss = (yhat - y).pow(2).sum()

    # Compute gradients and update parameters     
    optimizer.zero_grad()   
    loss.backward()         
    optimizer.step()       

    if i%50==0: print(f'i:{i}, W:{W.item()}, b:{b.item()}, loss:{loss.item()}')

In [None]:
# Step 5: Evaluate the Model
import matplotlib.pyplot as plt

plt.plot(X,y,'o')
plt.plot(X,model(X).data,'r')
plt.show()

### Demo: Build a Predictive Regression Model for Housing Price

#### Step 1 Preprocess Data

In [None]:
import pandas as pd
dataset_path = "https://raw.githubusercontent.com/tertiarycourses/datasets/master/boston.csv"                     
dataset = pd.read_csv(dataset_path)
dataset

In [None]:
dataset = dataset.dropna()

In [None]:
x_train = dataset.sample(frac=0.7,random_state=0)
x_test = dataset.drop(x_train.index)

In [None]:
y_train = x_train.pop('medv')
y_test = x_test.pop('medv')

In [None]:
x_train = (x_train - x_train.mean())/(x_train.max()-x_train.min())
x_test = (x_test - x_test.mean())/(x_test.max()-x_test.min())

In [None]:
x_train.shape

In [None]:
import torch

x_train = torch.Tensor(x_train.values)
y_train = torch.Tensor(y_train.values)

x_test = torch.Tensor(x_test.values)
y_test = torch.Tensor(y_test.values)

In [None]:
y_train = torch.unsqueeze(y_train, dim=1) 
y_test = torch.unsqueeze(y_test, dim=1) 

In [None]:
x_train.shape, y_train.shape,x_test.shape,y_test.shape

#### Step 2 Build the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 32
L2 = 64

class Model(nn.Module):

    def __init__(self):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(13,L1)     
        self.fc2 = nn.Linear(L1,L2)
        self.fc3 = nn.Linear(L2,1)

    def forward(self, x):
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)             
        return x

model = Model() 
print(model) 


#### Step 3 Define the Loss Function and Optimizer



In [None]:
#  Loss function

criterion = nn.MSELoss()  

In [None]:
# Optimizer

learning_rate = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#### Step 4 Train the Model

In [None]:
for i in range(1000):

	# Model prediction
    yhat = model(x_train)

    # Compute loss
    loss = criterion(yhat, y_train)

    # Compute gradients and update parameters     
    optimizer.zero_grad()   
    loss.backward()         
    optimizer.step()       

    if i%50==0: print(f'step {i}. loss = {loss.item():0.2f}')

#### Step 5 Evaluate the Model

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_test, model(x_test).data)
plt.xlabel('True Values [Housing Price]')
plt.ylabel('Predictions [Housing Price]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
plt.plot([0, 100], [0, 100])

In [None]:
y_hat.shape,y_test.shape

### Save and Load Model

In [None]:
torch.save(model,'./regression.pkl')

In [None]:
new_model=torch.load('./regression.pkl')

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_test, new_model(x_test).data)
plt.xlabel('True Values [Housing Price]')
plt.ylabel('Predictions [Housing Price]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
plt.plot([0, 100], [0, 100])

### Activity: Predictive Model

### Step 1 Preprocess Data

In [None]:
import pandas as pd
dataset_path = "https://raw.githubusercontent.com/tertiarycourses/datasets/master/iris.csv"
                     
dataset = pd.read_csv(dataset_path)
dataset

In [None]:
dataset = dataset.dropna()
dataset.pop('species')
dataset

In [None]:
x_train = dataset.sample(frac=0.7,random_state=0)
x_test = dataset.drop(x_train.index)

In [None]:
y_train = x_train.pop('sepal_width')
y_test = x_test.pop('sepal_width')

In [None]:
x_train = (x_train - x_train.mean())/(x_train.max()-x_train.min())
x_test = (x_test - x_test.mean())/(x_test.max()-x_test.min())

In [None]:
import torch

x_train = torch.Tensor(x_train.values)
y_train = torch.Tensor(y_train.values)

x_test = torch.Tensor(x_test.values)
y_test = torch.Tensor(y_test.values)

In [None]:
y_train = torch.unsqueeze(y_train, dim=1) 
y_test = torch.unsqueeze(y_test, dim=1) 

In [None]:
x_train.shape, y_train.shape,x_test.shape,y_test.shape

### Step 2 Build the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 32
L2 = 64

class Model(nn.Module):

    def __init__(self):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(3,L1)     
        self.fc2 = nn.Linear(L1,L2)
        self.fc3 = nn.Linear(L2,1)

    def forward(self, x):
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)             
        return x

model = Model() 
print(model) 

### Step 3 Define the Loss Function and Optimizer

In [None]:
#  Loss function

criterion = nn.MSELoss()  

In [None]:
# Optimizer

learning_rate = 0.0001

optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

### Step 4 Train the Model

In [None]:
for i in range(1000):

	# Model prediction
    yhat = model(x_train)

    # Compute loss
    loss = criterion(yhat, y_train)

    # Compute gradients and update parameters     
    optimizer.zero_grad()   
    loss.backward()         
    optimizer.step()       

    if i%50==0: print(f'step {i}. loss = {loss.item():0.2f}')

### Step 5 Evaluate the Model

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_test, model(x_test).data)
plt.xlabel('True Values Sepal Width')
plt.ylabel('Predictions Sepal Width')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
plt.plot([0, 100], [0, 100])

## Build a Classification Model Using NN


### MNIST Classification

#### Step 1: Prepare the Data

In [None]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download MNIST training dataset and load training data
trainset = datasets.MNIST('~/.pytorch/MNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download MNIST test dataset and load test data
testset = datasets.MNIST('~/.pytorch/MNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [None]:
import matplotlib.pyplot as plt

image,label = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(25):
  plt.subplot(5,5,i+1)
  plt.imshow(image[i][0], cmap='gray')
  plt.title("Ground Truth: {}".format(label[i]))
  plt.xticks([])
  plt.yticks([])

#### Step 2: Build the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 32
L2 = 64

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(784,L1)     
        self.fc2 = nn.Linear(L1,L2)
        self.fc3 = nn.Linear(L2,10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)   
        return x

model = Model()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device) 

In [None]:
# Alternative way to define the model
# Step 2: Define Model

import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 32
L2 = 64
model = nn.Sequential(nn.Flatten(),
                      nn.Linear(784, L1),
                      nn.ReLU(),
                      nn.Linear(L1, L2),
                      nn.ReLU(),
                      nn.Linear(L2, 10))


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device) 

#### Step 3: Define the Loss Function and Optimizer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)   

#### Step 4: Train the Model

In [None]:
num_epochs = 10
train_tracker, test_tracker, accuracy_tracker = [], [], []

for i in range(num_epochs):
    train_loss = 0
    
    for batch, (X, y) in enumerate(trainloader):
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)

        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    for batch, (X, y) in enumerate(testloader,1):
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)
        loss = criterion(yhat,y)

        test_loss += loss.item()
        
        _, pred = torch.max(yhat.data, 1)
        total += y.size(0)
        num_correct += (pred == y.data).sum()

    test_tracker.append(test_loss/len(testloader))
    print(f"Test loss: {test_loss/len(testloader)} | ", end='')

    accuracy_tracker.append(num_correct/total)
    print(f'Accuracy : {num_correct/total}')        

print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy: {num_correct * 100 / total}% ')

#### Step 5 Evaluate the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

### Activity: Fashion MNIST Classification

#### Step 1: Prepare the Data

In [None]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [None]:
import matplotlib.pyplot as plt

image,label = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(25):
  plt.subplot(5,5,i+1)
  plt.imshow(image[i][0], cmap='gray')
  plt.title("Ground Truth: {}".format(label[i]))
  plt.xticks([])
  plt.yticks([])

#### Step 2: Define the model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 32
L2 = 64

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(784,L1)     
        self.fc2 = nn.Linear(L1,L2)
        self.fc3 = nn.Linear(L2,10)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x)) 
        x = F.relu(self.fc2(x)) 
        x = self.fc3(x)   
        return x

model = Model()


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device) 

#### Step 3: Define the loss function and Optimizer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  

#### Step 4: Train the Model

In [None]:
num_epochs = 10
train_tracker, test_tracker, accuracy_tracker = [], [], []

for i in range(num_epochs):
    train_loss = 0
    
    for batch, (X, y) in enumerate(trainloader):
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)

        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    for batch, (X, y) in enumerate(testloader,1):
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)
        loss = criterion(yhat,y)

        test_loss += loss.item()
        
        _, pred = torch.max(yhat.data, 1)
        total += y.size(0)
        num_correct += (pred == y.data).sum()

    test_tracker.append(test_loss/len(testloader))
    print(f"Test loss: {test_loss/len(testloader)} | ", end='')

    accuracy_tracker.append(num_correct/total)
    print(f'Accuracy : {num_correct/total}')        
    
print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy of the model after 30 epochs on the 10000 test images: {num_correct * 100 / total}% ')
    

#### Step 5: Evaluate the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

# Topic 3 Recurrent Neural Network (RNN)


## LSTM and Input Parameters

In [None]:
import torch
import torch.nn as nn

number_layers = 1 #similar to hidden layers
batch_size = 5
input_size = 10 # similar to  embedding vector size to represent a word 
hidden_size = 20 # similar to hidden vector
sequence_length = 3 # eg 3 words in a sequence or 3 days stock price data 

rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=number_layers, batch_first=True)
input = torch.randn(batch_size, sequence_length, input_size)
h0 = torch.randn(number_layers, batch_size, hidden_size)
_, hn = rnn(input, h0)

In [None]:
input.shape, hn.shape

In [None]:
import torch
import torch.nn as nn

number_layers = 1 #similar to hidden layers
batch_size = 1 
input_size = 10 # similar to  embedding vector size to represent a word 
hidden_size = 20 # similar to hidden vector
sequence_length = 3 # # eg 3 words in a sequence or 3 days stock price dat

rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=number_layers, batch_first=False)
input = torch.randn(sequence_length, batch_size, input_size)
h0 = torch.randn(number_layers, batch_size, hidden_size)
_, hn = rnn(input, h0)

In [None]:
input.shape, hn.shape

In [None]:
import torch
import torch.nn as nn

number_layers = 1 #similar to hidden layers
batch_size = 1 
input_size = 10 # similar to  embedding vector size to represent a word 
hidden_size = 20 # similar to hidden vector
sequence_length = 3 # eg 3 days stock price data 

rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=number_layers, batch_first=True)
input = torch.randn(batch_size, sequence_length, input_size)
h0 = torch.randn(number_layers, batch_size, hidden_size)
c0 = torch.randn(number_layers, batch_size, hidden_size)
_,(hn, cn) = rnn(input, (h0, c0))


In [None]:
input.shape, hn.shape

In [None]:
import torch
import torch.nn as nn

number_layers = 1 #similar to hidden layers
batch_size = 1 
input_size = 10 # similar
 to  embedding vector size to represent a word 
hidden_size = 20 # similar to hidden vector
sequence_length = 3 # eg 3 days stock price data 

rnn = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=number_layers, batch_first=False)
input = torch.randn(sequence_length, batch_size, input_size)
h0 = torch.randn(number_layers, batch_size, hidden_size)
c0 = torch.randn(number_layers, batch_size, hidden_size)
_,(hn, cn) = rnn(input, (h0, c0))


In [None]:
hn.shape

## RNN Demo (Airplane Passengers Dataset)

### LSTM Model

#### Step 1: Prepare the Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set = pd.read_csv('airline-passengers.csv')
training_set.head(10)

In [None]:
training_set.describe()

In [None]:
training_set = training_set.iloc[:,1:2].values

plt.plot(training_set, label = 'Data')
plt.show()

In [None]:
def sliding_window(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 4
x, y = sliding_window(training_data, seq_length)

In [None]:
train_size = int(len(y) * 0.67)
test_size = int(len(y)) - train_size

dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))

testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))

In [None]:
dataX.shape, trainX.shape, testX.shape

#### Step 2: Define the Model

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)       
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Propagate input through LSTM
        _, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [None]:
input_size = 1
hidden_size = 2
num_layers = 1
num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

#### Step 3: Define Loss Function and Optimizer

In [None]:
learning_rate = 0.01

criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)


#### Step 4: Train the Model

In [None]:
num_epochs = 2000

for epoch in range(num_epochs):
    outputs = lstm(trainX)
    
    optimizer.zero_grad()
    loss = criterion(outputs, trainY)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

#### Step 5: Evaluate the Model

In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='g', linestyle='--')

plt.plot(dataY_plot,'b',label='actual')
plt.plot(data_predict,'r',label='prediction')
plt.suptitle('Time Series Prediction')
plt.legend()
plt.show()

### GRU Model

#### Step 1: Prepare the Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set = pd.read_csv('airline-passengers.csv')
training_set.head(10)

In [None]:
training_set = training_set.iloc[:,1:2].values

plt.plot(training_set, label = 'Data')
plt.show()

In [None]:
def sliding_window(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 4
x, y = sliding_window(training_data, seq_length)

In [None]:
train_size = int(len(y) * 0.67)
test_size = int(len(y)) - train_size

dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))

testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))

#### Step 2:  Define the Model

In [None]:
class GRU(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(GRU, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Propagate input through LSTM
        _, h_out = self.gru(x, h_0)
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [None]:
input_size = 1
hidden_size = 2
num_layers = 1
num_classes = 1

gru = GRU(num_classes, input_size, hidden_size, num_layers)

#### Step 3: Loss and Optimizer

In [None]:
learning_rate = 0.01

criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(gru.parameters(), lr=learning_rate)

#### Step 4: Train the Model

In [None]:
num_epochs = 2000

for epoch in range(num_epochs):
    outputs = gru(trainX)
    
    optimizer.zero_grad()
    loss = criterion(outputs, trainY)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

#### Step 4:  Evaluate the Model

In [None]:
gru.eval()
train_predict = gru(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='g', linestyle='--')

plt.plot(dataY_plot,'b',label='actual')
plt.plot(data_predict,'r',label='prediction')
plt.suptitle('Time Series Prediction')
plt.legend()
plt.show()

### RNN Model

#### Step 1: Prepare the Model

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set = pd.read_csv('airline-passengers.csv')
training_set.head(10)

In [None]:
training_set = training_set.iloc[:,1:2].values

plt.plot(training_set, label = 'Data')
plt.show()

In [None]:
def sliding_window(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 4
x, y = sliding_window(training_data, seq_length)

In [None]:
train_size = int(len(y) * 0.67)
test_size = int(len(y)) - train_size

dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))

testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))

#### Step 2: Define the Model

In [None]:
class RNN(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Propagate input through LSTM
        _, h_out = self.rnn(x, h_0)
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [None]:
input_size = 1
hidden_size = 2
num_layers = 1
num_classes = 1

rnn = RNN(num_classes, input_size, hidden_size, num_layers)

#### Step 3: Loss and Optimizer

In [None]:
learning_rate = 0.01

criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)

#### Step 4: Train the Model

In [None]:
num_epochs = 2000

for epoch in range(num_epochs):
    outputs = rnn(trainX)
    
    optimizer.zero_grad()
    loss = criterion(outputs, trainY)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

#### Step 5: Evaluate the Model

In [None]:
rnn.eval()
train_predict = rnn(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='g', linestyle='--')

plt.plot(dataY_plot,'b',label='actual')
plt.plot(data_predict,'r',label='prediction')
plt.suptitle('Time Series Prediction')
plt.legend()
plt.show()

## Activity: LSTM for Shampoo Sales

### Step 1: Load Data (Shampoo Dataset)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set = pd.read_csv('shampoo.csv')
training_set.head(10)

In [None]:
training_set.describe()

In [None]:
training_set = training_set.iloc[:,1:2].values

plt.plot(training_set, label = 'Data')
plt.show()

In [None]:
def sliding_window(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 3
x, y = sliding_window(training_data, seq_length)

In [None]:
train_size = int(len(y) * 0.67)
test_size = int(len(y)) - train_size

dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))

testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))

### Step 2: Define the Model

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Propagate input through LSTM
        _, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [None]:
input_size = 1
hidden_size = 3
num_layers = 1
num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

### Step 3: Loss Function and Optimizer

In [None]:
learning_rate = 0.01

criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

### Step 4: Train the Model

In [None]:
num_epochs = 2000

for epoch in range(num_epochs):
    outputs = lstm(trainX)
    
    optimizer.zero_grad()
    loss = criterion(outputs, trainY)
    loss.backward()
    optimizer.step()

    
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

### Step 5: Evaluate the Model

In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='g', linestyle='--')

plt.plot(dataY_plot,'b',label='actual')
plt.plot(data_predict,'r',label='prediction')
plt.suptitle('Time Series Prediction')
plt.legend()
plt.show()

## Activity: Stock Price Prediction

### Step 1: Prepare the Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

In [None]:
training_set = pd.read_csv('AAPL.csv',usecols=['Date','Close'])
training_set.head(10)

In [None]:
training_set.describe()

In [None]:
training_set = training_set.iloc[:,1:2].values

plt.plot(training_set, label = 'Data')
plt.show()

In [None]:
def sliding_window(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x),np.array(y)

In [None]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 4
x, y = sliding_window(training_data, seq_length)

In [None]:
train_size = int(len(y) * 0.67)
test_size = int(len(y)) - train_size

dataX = torch.Tensor(np.array(x))
dataY = torch.Tensor(np.array(y))

trainX = torch.Tensor(np.array(x[0:train_size]))
trainY = torch.Tensor(np.array(y[0:train_size]))

testX = torch.Tensor(np.array(x[train_size:len(x)]))
testY = torch.Tensor(np.array(y[train_size:len(y)]))

### Step 2: Define the Model

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        # Propagate input through LSTM
        _, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        
        return out

In [None]:
input_size = 1
hidden_size = 2
num_layers = 1
num_classes = 1

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

### Step 3: Loss and Optimizer

In [None]:
learning_rate = 0.01

criterion = torch.nn.MSELoss()    
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

### Step 4: Train the Model

In [None]:
num_epochs = 2000

for epoch in range(num_epochs):
    outputs = lstm(trainX)
    
    optimizer.zero_grad()
    loss = criterion(outputs, trainY)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

### Step 5: Evaluate the Model

In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='g', linestyle='--')

plt.plot(dataY_plot,'b',label='actual')
plt.plot(data_predict,'r',label='prediction')
plt.suptitle('Time Series Prediction')
plt.legend()
plt.show()

## Text Classificaiton

### Step 1: Import IMDN Dataset

In [None]:
import torch
from torchtext.legacy import data
from torchtext.legacy import datasets

TEXT = data.Field(tokenize = 'spacy',tokenizer_language = 'en_core_web_sm')
LABEL = data.LabelField(dtype = torch.float)

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

In [None]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')

In [None]:
print(vars(train_data.examples[0]))

In [None]:
import random

train_data, valid_data = train_data.split(random_state = random.seed(SEED))

In [None]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(test_data)}')

In [None]:
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(train_data, max_size = MAX_VOCAB_SIZE)
LABEL.build_vocab(train_data)

In [None]:
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

In [None]:
print(LABEL.vocab.stoi)

In [None]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainloader, validloader, testloader = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE,
    device = device)

### Step 2: Define the Model

In [None]:
import torch.nn as nn

class GRU(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):       
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)    
        self.gru = nn.GRU(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)    
        output, h_out = self.gru(embedded)       
        assert torch.equal(output[-1,:,:], h_out.squeeze(0))
        return self.fc(h_out.squeeze(0))

In [None]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = GRU(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
model = model.to(device)

### Step 3: Loss and Optimizer

In [None]:
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

### Step 4 Train the Model

In [None]:
def train():
    
    epoch_loss = 0

    model.train()
    for (X,y) in trainloader:
        optimizer.zero_grad()
        yhat = model(X).squeeze(1)
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(trainloader)

In [None]:
def evaluate():
    
    epoch_loss = 0
    epoch_acc = 0

    model.eval()
    with torch.no_grad():
        for (X,y) in validloader:
            yhat = model(X).squeeze(1)
            loss = criterion(yhat, y)

            epoch_loss += loss.item()

            preds = torch.round(torch.sigmoid(yhat))
            correct = (preds == y)
            acc = correct.sum() / len(y)
            epoch_acc += acc.item()
        
    return epoch_loss / len(validloader), epoch_acc / len(validloader)

In [None]:
def test():
    
    epoch_loss = 0
    epoch_acc = 0

    model.eval()
    with torch.no_grad():
        for (X,y) in testloader:
            yhat = model(X).squeeze(1)
            loss = criterion(yhat, y)

            epoch_loss += loss.item()

            preds = torch.round(torch.sigmoid(yhat))
            correct = (preds == y)
            acc = correct.sum() / len(y)
            epoch_acc += acc.item()
        
    return epoch_loss / len(testloader), epoch_acc / len(testloader)

In [None]:
N_EPOCHS = 5

for epoch in range(N_EPOCHS):
    train_loss = train()
    valid_loss, valid_acc = evaluate()

    print(f'Epoch: {epoch+1} Train Loss: {train_loss:.3f} | Val Loss: {valid_loss:.3f} |  Val Acc: {valid_acc*100:.2f}%')

### Step 5 Test the Model

In [None]:
test_loss, test_acc = test()
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

# Topic 4 Convolutional Neural Network (CNN)

## MNIST Demo

### Step 1: Prepare the data

In [None]:
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize(mean, std)])

# Download MNIST training dataset and load training data
trainset = datasets.MNIST('MNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download MNIST test dataset and load test data
testset = datasets.MNIST('MNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

### Step 2: Define the model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 16
L2 = 32
L3 = 128

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.conv1 = nn.Conv2d(1,L1,3,1,1)
        self.conv2 = nn.Conv2d(L1,L2,3,1,1)
        self.fc1 = nn.Linear(L2*7*7,L3)
        self.fc2 = nn.Linear(L3, 10)     

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))              
        x = self.fc2(x)
        return x

model = Model()


In [None]:
# Alternative way to define the model
# Step 2: Define Model

import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 16
L2 = 32
L3 = 128

model = nn.Sequential(nn.Conv2d(1,L1,3,1,1),
                      nn.ReLU(),
                      nn.MaxPool2d(2),
                      nn.Conv2d(L1,L2,3,1,1),
                      nn.ReLU(),
                      nn.MaxPool2d(2),
                      nn.Flatten(),
                      nn.Linear(L2*7*7,L3),
                      nn.Linear(L3, 10))



In [None]:
torch.cuda.is_available()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model = model.to(device)

### Step 3: Define Loss Function and Optimizer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)   

### Step 4: Train the Model

In [None]:
num_epochs = 10
train_tracker, test_tracker, accuracy_tracker = [], [], []

for i in range(num_epochs):
    train_loss = 0
    
    for (X, y) in trainloader:
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)

        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    with torch.no_grad():
        for (X, y) in testloader:            
            yhat = model(X)
            loss = criterion(yhat,y)
            test_loss += loss.item()
            
            _, pred = torch.max(yhat.data, 1)
            total += y.size(0)
            num_correct += (pred == y.data).sum()

    test_tracker.append(test_loss/len(testloader))
    print(f"Test loss: {test_loss/len(testloader)} | ", end='')
    
    accuracy_tracker.append(num_correct/total)
    print(f'Accuracy : {num_correct/total}')    

print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy of the model after 30 epochs on the 10000 test images: {num_correct * 100 / total}% ')
           

### Step 5: Evaluate the model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

## Activity: CNN on CIFAR

### Step 1: Prepare the Data

In [None]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    ])

trainset = datasets.CIFAR10(root='CIFAR10/',train=True, download=True,transform=transform)
testset = datasets.CIFAR10(root='CIFAR10/',train=False, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

### Step 2: Define the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 16
L2 = 32
L3 = 128

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.conv1 = nn.Conv2d(3,L1,3,1,1)
        self.conv2 = nn.Conv2d(L1,L2,3,1,1)
        self.fc1 = nn.Linear(L2*8*8,L3)
        self.fc2 = nn.Linear(L3, 10)     

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view( x.size(0),-1) 
        x = F.relu(self.fc1(x))              
        x = self.fc2(x)
        return x

model = Model()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)

### Step 3: Loss and Optimizer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 

### Step 4: Train the Model

In [None]:
num_epochs = 10
train_tracker, test_tracker, accuracy_tracker = [], [], []


for i in range(num_epochs):
    train_loss = 0

    model.train()
    for (X, y) in trainloader:
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)
        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    model.eval()
    with torch.no_grad():
        for (X, y) in testloader:        
            yhat = model(X)
            loss = criterion(yhat,y)
            test_loss += loss.item()
            
            _, pred = torch.max(yhat.data, 1)
            total += y.size(0)
            num_correct += (pred == y.data).sum()

    test_tracker.append(test_loss/len(testloader))
    print(f"Test loss: {test_loss/len(testloader)} | ", end='')

    accuracy_tracker.append(num_correct/total)
    print(f'Accuracy : {num_correct/total}')   

print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy of the model after 30 epochs on the 10000 test images: {num_correct * 100 / total}% ')
           

### Step 5: Evaluate the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

## Techniques to Resolve Overfitting Issue

### Baseline wihtout Data Augmentatin and Dropout

#### Step 1: Load Small Dataset

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
}


In [None]:
data_dir = '/content/drive/MyDrive/dataset/hymenoptera_data'

In [None]:
trainset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms['train'])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transforms['val'])
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True)

In [None]:
import numpy as np

def denormalize(tensor):
  tensor = tensor*std+ mean
  return tensor

def show_img(img):
  img = img.numpy().transpose((1,2,0))
  img = denormalize(img)
  img = np.clip(img,0,1)
  plt.imshow(img)

def get_class(id):
  classes = ['ant', 'bee']
  return classes[id]

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i]))
  plt.xticks([])
  plt.yticks([])

In [None]:
images.shape

In [None]:
labels

#### Step 2: Define the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 16
L2 = 32
L3 = 128

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.conv1 = nn.Conv2d(3,L1,3,1,1)
        self.conv2 = nn.Conv2d(L1,L2,3,1,1)
        self.fc1 = nn.Linear(L2*56*56,L3)
        self.fc2 = nn.Linear(L3, 2)     

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view( x.size(0),-1) 
        x = F.relu(self.fc1(x))              
        x = self.fc2(x)
        return x

model = Model()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)

#### Step 3: Loss and Optimizer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

In [None]:
device

#### Step 4: Train the Model

In [None]:
num_epochs = 15
train_tracker, test_tracker, accuracy_tracker = [], [], []

for i in range(num_epochs):
    train_loss = 0
    
    model.train()
    for (X, y) in trainloader:
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)
        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    model.eval()
    with torch.no_grad():
        for (X, y) in testloader:         
            yhat = model(X)
            loss = criterion(yhat,y)
            test_loss += loss.item()
            
            _, pred = torch.max(yhat.data, 1)
            total += y.size(0)
            num_correct += (pred == y.data).sum()

    test_tracker.append(test_loss/len(testloader))
    print(f"Test loss: {test_loss/len(testloader)} | ", end='')

    accuracy_tracker.append(num_correct/total)
    print(f'Accuracy : {num_correct/total}')   

print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy of the model after 30 epochs on the 10000 test images: {num_correct * 100 / total}% ')

#### Step 5: Evalaute the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(testloader))
output = model(images)
_,pred = torch.max(output,1)

fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i])+"/"+get_class(pred[i]))
  plt.xticks([])
  plt.yticks([])

### Data Augmentation and Dropout

#### Step 1: Load the Small Dataset

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation

mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),                         
        transforms.RandomRotation(20),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
}

In [None]:
data_dir = '/content/drive/MyDrive/dataset/hymenoptera_data'


In [None]:
trainset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms['train'])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transforms['val'])
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True)

In [None]:
import numpy as np

def denormalize(tensor):
  tensor = tensor*std+ mean
  return tensor

def show_img(img):
  img = img.numpy().transpose((1,2,0))
  img = denormalize(img)
  img = np.clip(img,0,1)
  plt.imshow(img)

def get_class(id):
  classes = ['ant', 'bee']
  return classes[id]

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i]))
  plt.xticks([])
  plt.yticks([])

#### Step 2: Define the Model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

L1 = 16
L2 = 32
L3 = 128

class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.conv1 = nn.Conv2d(3,L1,3,1,1)
        self.d1 = nn.Dropout(0.2)
        self.conv2 = nn.Conv2d(L1,L2,3,1,1)
        self.fc1 = nn.Linear(L2*56*56,L3)
        self.fc2 = nn.Linear(L3, 2)     

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = self.d1(x) 
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view( x.size(0),-1) 
        
        x = F.relu(self.fc1(x))             
        x = self.fc2(x)
        return x

model = Model()

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)

#### Step 3: Loss Function and Optimzer

In [None]:
# Loss Function
criterion = nn.CrossEntropyLoss()                       

# Optmizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

#### Step 4: Train the Model

In [None]:
num_epochs = 15
train_tracker, test_tracker, accuracy_tracker = [], [], []

for i in range(num_epochs):
    train_loss = 0
    
    model.train()
    for (X, y) in trainloader:
        X = X.to(device)
        y = y.to(device)
        
        yhat = model(X)
        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
    
        train_loss += loss.item()
    
    train_tracker.append(train_loss/len(trainloader))
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')
    
    test_loss = 0
    num_correct = 0
    total = 0
    
    model.eval()
    with torch.no_grad():
        for (X, y) in testloader:
            
            yhat = model(X)
            loss = criterion(yhat,y)

            test_loss += loss.item()
            
            _, pred = torch.max(yhat.data, 1)
            total += y.size(0)
            num_correct += (pred == y.data).sum()
        
        test_tracker.append(test_loss/len(testloader))
        print(f"Test loss: {test_loss/len(testloader)} | ", end='')

        accuracy_tracker.append(num_correct/total)
        print(f'Accuracy : {num_correct/total}')   

print(f'\nNumber correct : {num_correct}, Total : {total}')
print(f'Accuracy of the model after 30 epochs on the 10000 test images: {num_correct * 100 / total}% ')

#### Step 5: Evalaute the Model

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15,5))
plt.subplot(1,2,1)
plt.plot(train_tracker, label='Training loss')
plt.plot(test_tracker, label='Test loss')
plt.legend()

plt.subplot(1,2,2)
plt.plot(accuracy_tracker, label='Test accuracy')
plt.show()

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(testloader))
output = model(images)
_,pred = torch.max(output,1)

fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i])+"/"+get_class(pred[i]))
  plt.xticks([])
  plt.yticks([])

# Topic 5 Application of Machine Learning to Signal Processing

## Load Pre-Trained Models

In [None]:
import torchvision.models as models

model = models.vgg16(pretrained=True)

## Load Test Image

In [None]:
from PIL import Image

image_path = 'cat.jpg'

preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

img = Image.open(image_path)
img_tensor = preprocess(img)
img_tensor = img_tensor.unsqueeze_(0)

In [None]:
import requests

LABELS_URL = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
labels = {int(key):value for (key, value) in requests.get(LABELS_URL).json().items()}
labels

In [None]:
import matplotlib.pyplot as plt

predict = model(img_tensor)

print(labels[predict.data.numpy().argmax()])
plt.imshow(img)

## Activity: Pre-trained Model

In [None]:
import torchvision.models as models

model = models.resnet18(pretrained=True)

In [None]:
from PIL import Image

image_path = 'elephant.jpg'

preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

img = Image.open(image_path)
img_tensor = preprocess(img)
img_tensor = img_tensor.unsqueeze_(0)

In [None]:
import requests

LABELS_URL = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
labels = {int(key):value for (key, value) in requests.get(LABELS_URL).json().items()}
labels

In [None]:
import matplotlib.pyplot as plt

predict = model(img_tensor)

print(labels[predict.data.numpy().argmax()])
plt.imshow(img)

## Feature Extraction

### Step 1: Load the Small Dataset

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
}


In [None]:
data_dir = '/content/drive/MyDrive/dataset/hymenoptera_data'


In [None]:
trainset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms['train'])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transforms['val'])
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True)

In [None]:
import numpy as np

def denormalize(tensor):
  tensor = tensor*std+ mean
  return tensor

def show_img(img):
  img = img.numpy().transpose((1,2,0))
  img = denormalize(img)
  img = np.clip(img,0,1)
  plt.imshow(img)

def get_class(id):
  classes = ['ant', 'bee']
  return classes[id]

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i]))
  plt.xticks([])
  plt.yticks([])

### Step 2: Load the Pre-trained Model

In [None]:
import torchvision.models as models

model = models.vgg16(pretrained=True)

### Model Features

In [None]:
model.features

### Step 3: Replace Classifier and Output layers

In [None]:
model.classifier

In [None]:
for param in model.parameters():
  param.requires_grad = False

In [None]:
model.classifier[-1] = nn.Sequential(
                       nn.Linear(in_features=4096, out_features=10)
                        )

In [None]:
model.classifier

### Add GPU

In [None]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)


### Step 4: Loss Function and Optimizer

In [None]:
from torch.optim import Adam

criterion = nn.CrossEntropyLoss()                           
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

In [None]:
device

### Step 5: Re-train the Model

In [None]:
from tqdm.notebook import trange,tqdm

num_epochs = 2

for i in trange(num_epochs):
    train_loss = 0
    
    for (X, y) in tqdm(trainloader):
        X = X.to(device)
        y = y.to(device)

        yhat = model(X)
        
        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss  += loss.item()
    
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')

### Step 6: Evaluate the Model

In [None]:
from tqdm.notebook import trange,tqdm

model.eval()
with torch.no_grad():
    num_correct = 0
    total = 0

    for (X,y) in tqdm(testloader):
        
        yhat = model(X)
        _, pred = torch.max(yhat.data, 1)
        total += y.size(0)
        num_correct += (pred == y.data).sum()

    print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')
           

### Step 7: Test the Model

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(testloader))
output = model(images)
_,pred = torch.max(output,1)

fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i])+"/"+get_class(pred[i]))
  plt.xticks([])
  plt.yticks([])

## Fine Tunning

### Step 1: Load the Small Dataset

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]

transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ]),
}


In [None]:
data_dir = '/content/drive/MyDrive/dataset/hymenoptera_data'

In [None]:
trainset = datasets.ImageFolder(os.path.join(data_dir, 'train'), transforms['train'])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = datasets.ImageFolder(os.path.join(data_dir, 'val'), transforms['val'])
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True)

In [None]:
import numpy as np

def denormalize(tensor):
  tensor = tensor*std+ mean
  return tensor

def show_img(img):
  img = img.numpy().transpose((1,2,0))
  img = denormalize(img)
  img = np.clip(img,0,1)
  plt.imshow(img)

def get_class(id):
  classes = ['ant', 'bee']
  return classes[id]

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(trainloader))
fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i]))
  plt.xticks([])
  plt.yticks([])

### Step 2: Load the pre-trained Model

In [None]:
import torchvision.models as models

model = models.vgg16(pretrained=True)

### Step 3: Unfreeze Model Layers

In [None]:
model.features

In [None]:
for param in model.parameters():
  param.requires_grad = False

In [None]:
for i in range(24,31):
  model.features[i].requires_grad = True

### Step 4: Replace Model Classifier

In [None]:
model.classifier

In [None]:
model.classifier[-1] = nn.Sequential(
                       nn.Linear(in_features=4096, out_features=10)
                        )

In [None]:
model.classifier

### Add GPU

In [None]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
model.to(device)

### Step 5: Loss Function and Optimizer

In [None]:
from torch.optim import Adam

criterion = nn.CrossEntropyLoss()                           
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 

### Step 6:Train the Model

In [None]:
from tqdm.notebook import trange,tqdm

num_epochs = 2

for i in trange(num_epochs):
    train_loss = 0
    
    for (X, y) in tqdm(trainloader):
        X = X.to(device)
        y = y.to(device)

        yhat = model(X)
        
        optimizer.zero_grad()
        loss = criterion(yhat, y)
        loss.backward()
        optimizer.step()
        
        train_loss  += loss.item()
    
    print(f"Epoch({i+1}/{num_epochs}) | Training loss: {train_loss/len(trainloader)} | ",end='')

### Step 7: Evaluate the Model

In [None]:
from tqdm.notebook import trange,tqdm

model.eval()
with torch.no_grad():
    num_correct = 0
    total = 0

    for (X,y) in tqdm(testloader):
        
        yhat = model(X)
        _, pred = torch.max(yhat.data, 1)
        total += y.size(0)
        num_correct += (pred == y.data).sum()

    print(f'Accuracy of the model on {total} test images: {num_correct * 100 / total}% ')

### Step 8:Test the Model

In [None]:
import matplotlib.pyplot as plt

images,labels = next(iter(testloader))
output = model(images)
_,pred = torch.max(output,1)

fig = plt.figure(figsize=(10,10))
for i in range(4):
  plt.subplot(5,5,i+1)
  show_img(images[i])
  plt.title(get_class(labels[i])+"/"+get_class(pred[i]))
  plt.xticks([])
  plt.yticks([])