# Prequisites

In [2]:
import torchvision
from torchvision import transforms
import torch
import os
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import shutil
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image

# Data 

* **Dataset** is a python class that allows us to get the data we are supplying to the neural network
* **Dataloader** feeds data from dataset to the neural network 

In [None]:
classes = {}
counter = 0
x = []

for i in tqdm(os.listdir("pkmn_images")):
    image_folder = os.path.join("pkmn_images", i)
    image_num = len(os.listdir(image_folder))
    classes[i] = image_num
    counter = counter + 1
    x.append(counter)

values = list(classes.values())

plt.plot(x, values)
print("Number of pokemon are ", len(x))

In [None]:
os.mkdir("train")
os.mkdir("test")
os.mkdir("val")

In [None]:
for i in tqdm(os.listdir("pkmn_images")):
    class_folder = os.path.join("pkmn_images", i)
    images = os.listdir(class_folder)
    os.mkdir(f"train/{i}")
    counter = 1
    for j in images:
        source_path = os.path.join(class_folder, j)
        dest_path = os.path.join(f"train/{i}", j)
        shutil.move(source_path, dest_path)
        counter = counter + 1
        if(counter == 70):
            break

for i in tqdm(os.listdir("pkmn_images")):
    class_folder = os.path.join("pkmn_images", i)
    images = os.listdir(class_folder)
    os.mkdir(f"val/{i}")
    counter = 1
    for j in images:
        source_path = os.path.join(class_folder, j)
        dest_path = os.path.join(f"val/{i}", j)
        shutil.move(source_path, dest_path)
        counter = counter + 1
        if(counter == 10):
            break


for i in tqdm(os.listdir("pkmn_images")):
    class_folder = os.path.join("pkmn_images", i)
    images = os.listdir(class_folder)
    os.mkdir(f"test/{i}")
    # counter = 1
    for j in images:
        source_path = os.path.join(class_folder, j)
        dest_path = os.path.join(f"test/{i}", j)
        shutil.move(source_path, dest_path)
        # counter = counter + 1
        # if(counter == 20):
        #     break

In [1]:
#One method to get items/data from our dataset 
# Method to get the length of our dataset 

class Dataset:
    def __getitem__(self, index):
        raise NotImplementedError
    def __len__(self):
        raise NotImplementedError

In [3]:
train_data_path = "train"

transformations = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485, 0.456, 0.406],
                          std = [0.229, 0.224, 0.225])
])

#Normalizing values between 0 and 1 makes the model a bit easier to learn the features since they are standardized or capped within a smaller range
#Also prevents the values from getting too large during training phase - exploding gradient problem

train_data = torchvision.datasets.ImageFolder(train_data_path, transform=transformations)

val_data_path = "val"
val_data = torchvision.datasets.ImageFolder(val_data_path, transform=transformations)

test_data_path = "test"
test_data = torchvision.datasets.ImageFolder(test_data_path, transform=transformations)

#Training set - for training pass to update the model 
#Val set - Evaluate how model is generalizing to problem domain rather than fitting to training data not used directly
#Test set - To get final evaluation of model  

In [4]:
# Dataloader

batch_size = 1024 #Num of images sent to the network once before updating it

train_data_loader = DataLoader(train_data, batch_size=batch_size)
val_data_loader = DataLoader(val_data, batch_size=batch_size)
test_data_loader = DataLoader(test_data, batch_size=batch_size)

# ANN

In [5]:
# Our class inherits from nn.Module to 
# Register parameters, save load state dicts, move to gpu/cpu easily, wire up hooks and the __call__ -> forward mechanism

class ANN(nn.Module):
    
    def __init__(self):  # Defining layers and objects we want to use across the forward passes
        super().__init__() # Initializes base nn.Module. Without this parameter registration/hook machinery wont work properly. You need super() to inherit from the parent class (nn.Module)
        self.fc1 = nn.Linear(12288, 604)
        self.fc2 = nn.Linear(604, 302)
        self.fc3 = nn.Linear(302, 151)

    def forward(self, x):
        x = torch.flatten(x, 1) #Flatten all dimensions except batch -> [B, 12288]
        x = F.relu(self.fc1(x)) #linear layers expect a 2D Tensor of [Batch, Tensors] not [B, C, H, W etc]
        x = F.relu(self.fc2(x))
        logits = self.fc3(x) #The logits are the raw tensor output from a model, preferably the last linear layers
        #probs = logits.softmax(dim=1)
        return logits 
    
model = ANN() #we dont call forward() directly. We call the class which internally calss forward() via nn.Module.__call__ (also runs pre/post hooks, handles autocast etc.)

Reasons for PyTorch wanting it this way is 

* Parameters in init -> They are registered, saved, moved to GPU and seen by optmizers. 
* Computation in forward: PyTochb builds a dynamic computation graph each call. Anything in forward is tracked by autograd for .backward()

In [6]:
device = ''
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

model = model.to(device)

In [7]:
def train(model, optimizer, loss_func, train_data_loader, val_data_loader, epochs, device):
    for epoch in tqdm(range(epochs)):
        training_loss = 0.0
        training_iterator = 0
        valid_loss = 0.0 
        model.train() 
        for batch in train_data_loader:
            optimizer.zero_grad()  #Refresh the optimizer for the next batch everytime 
            inputs, targets = batch 
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_func(output, targets)
            loss.backward()
            optimizer.step()
            training_loss += loss.data.item()
            training_iterator += 1
        training_loss /= training_iterator
        model.eval()
        valid_iterator = 0
        num_correct = 0 
        num_examples = 0
        for batch in val_data_loader:
            inputs, targets = batch 
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            loss = loss_func(output, targets)
            valid_loss += loss
            valid_iterator += 1 
            # preds = torch.max(output, dim=1)[1] # --> Could be a more idomatic approach
            # correct = torch.eq(preds, target)
            correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)
            # Taking the raw "output" -> logitts into probabilities across classes with softmax 
            # Picking the max indices [1] -> predicted class for each example. 
            # Torch.eq compares predicted class labels with ground truth labels returning a  boolean tensor of shape [B]
            num_correct += torch.sum(correct).item() #Summing how many predictions were true
            num_examples += correct.shape[0] #EQuating number of samples in each batch
        valid_loss /= valid_iterator
        print(f"Epoch [{epoch}] : Training Loss = {training_loss:.2f}  Validation Loss = {valid_loss:.2f}")

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss = torch.nn.CrossEntropyLoss()

train(model, optimizer, loss, train_data_loader, val_data_loader, 3, device)

  0%|          | 0/3 [00:00<?, ?it/s]

  correct = torch.eq(torch.max(F.softmax(output), dim=1)[1], targets).view(-1)


Epoch [0] : Training Loss = 8.71  Validation Loss = 5.36
Epoch [1] : Training Loss = 5.27  Validation Loss = 5.02
Epoch [2] : Training Loss = 5.01  Validation Loss = 5.02


In [9]:
labels = {}
for key, value in train_data.class_to_idx.items():
    labels[value] = key 

img_path = "train\\Pikachu\\24_0_5902.png"
img = Image.open(img_path)
img = transformations(img)
img = img.unsqueeze(0)
img = img.to(device)

predictions = model(img)
prediction = predictions.argmax().item()
print(labels[prediction])

Rapidash


In [13]:
# torch.save(model, "model.pkl") #Saving the current paramters and model structure in pickle format
# model = torch.load("model.pkl")
#Not recommended, better to save with state dict -> many benefits -> More flexibility to modify layers later on and not hardcoded to all previously trained layers

In [19]:
# Recommended path

torch.save(model.state_dict(), "model.pth")
model = ANN()
model_dict = torch.load("model.pth")
model.load_state_dict(model_dict)

<All keys matched successfully>