# Workout : Tensorflow vs Pytorch

1. Extract the code for stimulating tensorflow and pytorch
2. Using these parameters :
    - epoch = 30
    - maxIteration = 5

3. Save file name ending with suffix of accuracy(e.g: tf_90, pth_70)
    - extract the code for stimulating tensorflow
    - separate module (cv_image_tensorflow_model.py)
    - extract the code for stimulatinng pytorch
    - separate model (cv_image_pytorch_model.py)

4. Load the saved models with the highest accuracy for tensorflow and pytorch 
5. Predict for same dataImages in new file: cv_image_tensorflow_vs_pytorch.py
6. Use timeit package to record runtime of both algorithm
    - email the saved model (with highest accuracy) before 7pm without uploading to github
    - Get your project dataset ready for presentation tomorrow
    


$
\ Model \ for \ Pytorch
$

In [1]:
import torch 
from torch import nn 
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import numpy as np
import torchvision
from sklearn.model_selection import train_test_split
import random
import cv2 as cv 
import timeit
import time

Load data

In [2]:
dataSet = 'master_dataset.npz'
with np.load(dataSet, allow_pickle=True) as data:
    dataImages = data['images']
    dataLabels = data['labels'].astype('int64')
    dataLabelNames = data['labelnames']

Convert size

In [3]:
N = len(dataImages)
shape = (N, 200, 200, 3)
y = np.empty(shape)

for i in range(N):
    y[i] = cv.resize(dataImages[i], [200,200], interpolation=cv.INTER_NEAREST)

dataImages = y

Normalize data

In [4]:
dataImages = dataImages / 255.0

In [5]:
print(dataLabels.dtype)
dataLabels = dataLabels.astype('int64')

print(dataLabels.dtype)

int64
int64


Transfor to torch

In [6]:
dataImages2 = torch.Tensor(dataImages)

Convert to tuple

In [7]:
all_data = []
for i in range(len(dataImages2)):
   all_data.append([dataImages2[i], dataLabels[i]])

random.shuffle(all_data)

Based on tuple, split data into training and testing 

In [8]:
train_size = int(0.75 * len(all_data))
test_size = len(all_data) - train_size
train_dataset, test_dataset, = torch.utils.data.random_split(all_data, [train_size, test_size])


Check the size of train_dataset and test_dataset

In [9]:
len(train_dataset), len(test_dataset)

(317, 106)

In [10]:
classes = {
    0: "afiq",
    1: "azureen",
    2: "gavin",
    3: "goke",
    4: "inamul",
    5: "jincheng",
    6: "mahmuda",
    7: "numan",
    8: "saseendran"
}

### Dataloader

In [12]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

Shape of X [N, C, H, W]:  torch.Size([20, 200, 200, 3])
Shape of y:  torch.Size([20]) torch.int64


### CPU device training.

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [14]:
input_features = 3*200*200

### Create model

In [15]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_features, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 9)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=120000, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): Linear(in_features=512, out_features=9, bias=True)
  )
)


In [16]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    return loss, current

In [17]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return test_loss, correct

1. Run cell class NeuralNetwork
2. Run cell loss_fn & optimizer
3. Run cell def test
4. Run cell def train
5. Pastu skip yang lain & terus run yg ni

In [18]:

import os

maxIterations = 5
thresholdAcc = 0.9

testLoss = 0.0
testAcc = 0.0
epochs = 30

start = time.time()

for iter in range(maxIterations):

    print(f'Simulation {iter+1}/{maxIterations}', end='... ')

    for t in range(epochs):
        train(train_dataloader, model, loss_fn, optimizer)
        test_loss, testAcc = test(test_dataloader, model, loss_fn)

    # Save model if greater than threshold-accuracy 0.95
    if testAcc > thresholdAcc:
        file_name = f'pth_{(testAcc*100):.0f}.pth'
        torch.save(model.state_dict(), file_name)
        print("Saved PyTorch Model State")
        thresholdAcc = testAcc

end = time.time()
times = end - start
print('Time elapsed:', times)
print('Highest Acc =', thresholdAcc)
print('All task is done.')

Simulation 1/5... loss: 2.185797  [    0/  317]
Test Error: 
 Accuracy: 38.7%, Avg loss: 2.177878 

loss: 2.166652  [    0/  317]
Test Error: 
 Accuracy: 47.2%, Avg loss: 2.159613 

loss: 2.150229  [    0/  317]
Test Error: 
 Accuracy: 50.9%, Avg loss: 2.141391 

loss: 2.131680  [    0/  317]
Test Error: 
 Accuracy: 50.0%, Avg loss: 2.122791 

loss: 2.113213  [    0/  317]
Test Error: 
 Accuracy: 47.2%, Avg loss: 2.102739 

loss: 2.094452  [    0/  317]
Test Error: 
 Accuracy: 49.1%, Avg loss: 2.081956 

loss: 2.074804  [    0/  317]
Test Error: 
 Accuracy: 48.1%, Avg loss: 2.060345 

loss: 2.053852  [    0/  317]
Test Error: 
 Accuracy: 48.1%, Avg loss: 2.038016 

loss: 2.032138  [    0/  317]
Test Error: 
 Accuracy: 48.1%, Avg loss: 2.014360 

loss: 2.008949  [    0/  317]
Test Error: 
 Accuracy: 49.1%, Avg loss: 1.989081 

loss: 1.984289  [    0/  317]
Test Error: 
 Accuracy: 49.1%, Avg loss: 1.962823 

loss: 1.958309  [    0/  317]
Test Error: 
 Accuracy: 49.1%, Avg loss: 1.935800 