<a href="https://colab.research.google.com/github/pratyusha-code/Flower-Classification/blob/main/Project_2_ShapeAI_Pratyusha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PRATYUSHA MITRA**

**PROJECT 2 : FLOWER CLASSIFICATION**

**Import Headers**

General Headers

In [None]:
import os
import shutil
import glob
import cv2
import numpy as np
import random
from tqdm import tqdm
import matplotlib.pyplot as plt

from sklearn import preprocessing

from sklearn.preprocessing import OneHotEncoder

PyTorch based Headers

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch 
from torch import nn
from torch.nn import functional as Fp ins
from torch import optim
import torchvision.models as models

**SETTING UP THE DEVICE FOR GPU COMPUTATION**

1. Cuda : For faster computation using GPU
2. CPU : For normal computations
3. nvidia-smi : To check the GPU unit

In [None]:
use_cuda = torch.cuda.is_available()
print('use_cuda: {}'.format(use_cuda))
device = torch.device("cuda" if use_cuda else "cpu")
print("Device to be used : ",device)
!nvidia-smi

use_cuda: True
Device to be used :  cuda
Sun Oct 24 09:27:18 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   47C    P8    29W / 149W |      3MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+----------------------------------------------

**Setting up Data Path**

In [None]:
def read_data(data_path):

    labels = os.listdir(data_path)
    data = []
    X = []
    Y = []
    for l in labels:
        for img_addr in os.listdir(os.path.join(data_path,l)):
            X.append(os.path.join(data_path, l, img_addr))
            Y.append(l)

    le = preprocessing.LabelEncoder()
    le.fit(Y)
    Y = le.transform(Y)
    data = []
    for i in range(len(X)):
        data.append((X[i],Y[i]))
    return data

In [None]:
data = read_data("/content/drive/MyDrive/flowers")

**DATA GENERATOR**

In [None]:
class DataGenerator(Dataset):
	
    def __init__(self, image_list):
        self.files = image_list
        

    #NUMBER OF FILES IN THE DATASET
    def __len__(self):
        return len(self.files)
        

    #GETTING SINGLE PAIR OF DATA
    def __getitem__(self,idx):

        img = cv2.imread(self.files[idx][0])
        img = cv2.resize(img,(224,224))
        img = img * 1./255.
        img = (img - 0.5) + 0.5
        label = self.files[idx][1]
        return torch.FloatTensor(img).permute(2,0,1), torch.tensor(label)
		
	
def load_data(data, batch_size=32, num_workers=10, shuffle=True):

    dataset = DataGenerator(data)
    data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=shuffle)

    return data_loader

In [None]:
class Flower_Classifier(nn.Module):
    def __init__(self):
        super(Flower_Classifier, self).__init__()

        model_ft = models.vgg16(pretrained=True)
        model_ft.classifier = model_ft.classifier[:-1]
        model_ft.requires_grad_ = False
        self.flower_decode = nn.Sequential(model_ft,
                                           nn.Linear(4096,32),
                                           nn.Linear(32,16),
                                           nn.Linear(16,5))
        
    def forward(self, flower_image):
        decoded_vector = self.flower_decode(flower_image)
        return decoded_vector


**MODULE 3 : Training**

**SAVE CHECKPOINT**

In [None]:
def save_ckp(checkpoint, checkpoint_path):
    torch.save(checkpoint, checkpoint_path)

**LOAD CHECKPOINT**

In [None]:
def load_ckp(checkpoint_path, model, model_opt):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['state_dict'])
    model_opt.load_state_dict(checkpoint['optimizer'])
    return model, model_opt, checkpoint['epoch']

**TRAIN EPOCH**

In [None]:
def train_epoch(train_loader, model, optimizer, epoch):

    progress_bar = tqdm(enumerate(train_loader))
    total_loss = 0.0
    for step, (img, label) in progress_bar:

        model = model.to(device)
        img = img.to(device)
        label = label.to(device)
        model.train()
        img = img.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        pred_label = model.forward(img)
        ce = nn.CrossEntropyLoss()
        ce_loss = ce(pred_label,label)
        ce_loss.backward()
        optimizer.step()
        progress_bar.set_description("Epoch : {} Training Loss : {} ".format(epoch, ce_loss))

    return model, optimizer

In [None]:
def val_epoch(val_loader, model, optimizer, epoch):

    progress_bar = tqdm(enumerate(val_loader))
    total_loss = 0.0
    for step, (img, label) in progress_bar:

        model = model.to(device)
        img = img.to(device)
        label = label.to(device)
        model.eval()
        img = img.to(device)
        label = label.to(device)
        #optimizer.zero_grad()
        pred_label = model.forward(img)
        ce = nn.CrossEntropyLoss()
        ce_loss = ce(pred_label,label)
        #ce_loss.backward()
        #optimizer.step()
        progress_bar.set_description("Epoch : {} Validation Loss : {} ".format(epoch-1, ce_loss))

    return model, optimizer

In [None]:
def test_epoch(test_loader, model, optimizer, epoch):

    progress_bar = tqdm(enumerate(test_loader))
    total_loss = 0.0
    correct = 0
    for step, (img, label) in progress_bar:

        model = model.to(device)
        img = img.to(device)
        label = label.to(device)
        model.eval()
        img = img.to(device)
        label = label.to(device)
        pred_label = model.forward(img)
        
        correct += (label == pred_label.argmax(1)).sum()
    print("Accuracy: {}".format(correct / len(test_loader.dataset)))

**Code to control the Train, Test & Val**

In [None]:
def train_val_test(train_loader, val_loader, test_loader, model, optimizer, n_epoch, resume):

    #PATH TO SAVE THE CHECKPOINT
    checkpoint_path = "/ssd_scratch/pratyusha_m/checkpoints/checkpoint.pt"

    epoch = 0
    #IF TRAINING IS TO RESUMED FROM A CERTAIN CHECKPOINT
    if resume:
        model, optimizer, epoch = load_ckp(
            checkpoint_path, model, optimizer)

    while 1:
        model, optimizer = train_epoch(train_loader, model, optimizer, epoch)
        
        #CHECKPOINT CREATION
        checkpoint = {'epoch': epoch+1, 'state_dict': model.state_dict(),
                      'optimizer': optimizer.state_dict()}
        
        #CHECKPOINT SAVING
        save_ckp(checkpoint, checkpoint_path)
        print("Checkpoint Saved")

        #CHECKPOINT LOADING
        #model, optimizer, epoch = load_ckp(checkpoint_path, model, optimizer)
        #print("Checkpoint Loaded")
        with torch.no_grad():
            val_epoch(val_loader, model, optimizer, epoch)
            test_epoch(test_loader, model, optimizer, epoch)
        epoch+=1	 

**MAIN FUNCTION**

In [None]:
def main():

    data_path = "/home/projects/Flower_Classification/flowers"
    data  = read_data(data_path)
    random.shuffle(data)
    train_data = data[:int(0.6*len(data))]
    validation_data = data[len(train_data):len(train_data) + int(0.2*len(data))]
    test_data = data[len(train_data)+len(validation_data):]
    print("Total Number of samples : ",len(data))
    print("Train : {}, Validation : {}, Test : {}".format(len(train_data), len(validation_data), len(test_data)))


    #CREATING THE TRAIN LOADER
    train_loader = load_data(train_data, batch_size=32, num_workers=2, shuffle=True)
    
    #CREATING THE VALIDATION LOADER
    val_loader = load_data(validation_data, batch_size=32, num_workers=2, shuffle=True)
    
    #CREATING THE TEST LOADER
    test_loader = load_data(test_data, batch_size=32, num_workers=2, shuffle=True)

    #CALLING THE MODEL
    model = Flower_Classifier()
    
    #UPLOADING THE MODEL TO DEVICE
    model = model.to(device)

    #DEFINING THE OPTIMIZER
    optimizer = optim.Adam(
        [p for p in model.parameters() if p.requires_grad], lr=0.01)
    
    n_epoch = 100
    
    #INDICATOR VARIABLE TO RESUME TRAINING OR START AFRESH
    resume = False
    train_val_test(train_loader, val_loader, test_loader,model, optimizer, n_epoch, resume)

**CALLING THE MAIN FUNCTION**

In [None]:
main()

**To Note : This Code we executed in GPU in Terminal in my Desktop. Thease are the training log files. Running in Colab GPU was slow.**

use_cuda: True
Device to be used :  cuda
Total Number of samples :  4327
Train : 2596, Validation : 865, Test : 866
Epoch : 0 Training Loss : 0.18956269323825836 : : 82it [00:16,  4.97it/s]
Checkpoint Saved
Epoch : -1 Validation Loss : 0.0015585192013531923 : : 28it [00:01, 14.70it/s]
28it [00:01, 15.29it/s]
Accuracy: 0.8083140850067139
Epoch : 1 Training Loss : 1.6656051874160767 : : 82it [00:16,  4.96it/s] 
Checkpoint Saved
Epoch : 0 Validation Loss : 0.004584990907460451 : : 28it [00:01, 15.11it/s]
28it [00:01, 16.08it/s]
Accuracy: 0.8660507798194885
Epoch : 2 Training Loss : 0.18563959002494812 : : 82it [00:16,  4.94it/s] 
Checkpoint Saved
Epoch : 1 Validation Loss : 3.083498001098633 : : 28it [00:01, 15.03it/s]  
28it [00:01, 16.17it/s]
Accuracy: 0.8521940112113953
Epoch : 3 Training Loss : 1.5795180843269918e-06 : : 82it [00:16,  4.93it/s]
Checkpoint Saved
Epoch : 2 Validation Loss : 1.1920928244535389e-07 : : 28it [00:01, 15.15it/s]
28it [00:01, 15.83it/s]
Accuracy: 0.8799076080322266
Epoch : 4 Training Loss : 1.1065523624420166 : : 82it [00:16,  4.93it/s]   
Checkpoint Saved
Epoch : 3 Validation Loss : 1.1552094221115112 : : 28it [00:01, 15.11it/s] 
28it [00:01, 16.04it/s]
Accuracy: 0.7586604952812195
Epoch : 5 Training Loss : 0.00019396046991460025 : : 82it [00:16,  4.93it/s]
Checkpoint Saved
Epoch : 4 Validation Loss : 0.002970451721921563 : : 28it [00:01, 15.11it/s]
28it [00:01, 16.01it/s]
Accuracy: 0.8648960590362549
Epoch : 6 Training Loss : 0.0038618925027549267 : : 82it [00:16,  4.92it/s] 
Checkpoint Saved
Epoch : 5 Validation Loss : 0.0 : : 28it [00:01, 15.12it/s]                
28it [00:01, 16.00it/s]
Accuracy: 0.8891454935073853
Epoch : 7 Training Loss : 0.04477012902498245 : : 82it [00:16,  4.93it/s]  
Checkpoint Saved
Epoch : 6 Validation Loss : 0.0 : : 28it [00:01, 15.13it/s]                
28it [00:01, 15.99it/s]
Accuracy: 0.8937644362449646
Epoch : 8 Training Loss : 0.004453101195394993 : : 82it [00:16,  4.92it/s]  
Checkpoint Saved
Epoch : 7 Validation Loss : 0.0 : : 28it [00:01, 15.13it/s]               
28it [00:01, 16.00it/s]
Accuracy: 0.8279445767402649
Epoch : 9 Training Loss : 2.1457581169670448e-06 : : 82it [00:16,  4.92it/s]
Checkpoint Saved
Epoch : 8 Validation Loss : 6.183901786804199 : : 28it [00:01, 15.07it/s]  
28it [00:01, 15.98it/s]
Accuracy: 0.8833718299865723
Epoch : 10 Training Loss : 0.01083929743617773 : : 82it [00:16,  4.93it/s]   
Checkpoint Saved
Epoch : 9 Validation Loss : 0.0 : : 28it [00:01, 15.12it/s]                
28it [00:01, 16.17it/s]
Accuracy: 0.8568129539489746
Epoch : 11 Training Loss : 0.000218683693674393 : : 82it [00:16,  4.92it/s]  
Checkpoint Saved
Epoch : 10 Validation Loss : 0.00017998983094003052 : : 28it [00:01, 15.05it/s]
28it [00:01, 16.10it/s]
Accuracy: 0.8718245029449463
Epoch : 12 Training Loss : 2.8459604436648078e-05 : : 82it [00:16,  4.92it/s]
Checkpoint Saved
Epoch : 11 Validation Loss : 0.0 : : 28it [00:01, 15.05it/s]                 
28it [00:01, 16.02it/s]
Accuracy: 0.8625866174697876
Epoch : 13 Training Loss : 0.00902835838496685 : : 82it [00:16,  4.93it/s]   
Checkpoint Saved
Epoch : 12 Validation Loss : 0.0 : : 28it [00:01, 15.12it/s]                
28it [00:01, 16.00it/s]
Accuracy: 0.8556581735610962
Epoch : 14 Training Loss : 0.00013321341248229146 : : 82it [00:16,  4.92it/s]
Checkpoint Saved
Epoch : 13 Validation Loss : 0.0 : : 28it [00:01, 15.08it/s]                 
28it [00:01, 16.00it/s]
Accuracy: 0.8879907727241516
Epoch : 15 Training Loss : 1.072882469088654e-06 : : 82it [00:16,  4.92it/s] 
Checkpoint Saved
Epoch : 14 Validation Loss : 1.1920928244535389e-07 : : 28it [00:01, 15.02it/s]
28it [00:01, 15.92it/s]
Accuracy: 0.8521940112113953
Epoch : 16 Training Loss : 0.18984085321426392 : : 82it [00:16,  4.92it/s]   
Checkpoint Saved
Epoch : 15 Validation Loss : 2.244469404220581 : : 28it [00:01, 15.03it/s]  
28it [00:01, 16.18it/s]
Accuracy: 0.8637413382530212
Epoch : 17 Training Loss : 6.824669981142506e-06 : : 82it [00:16,  4.93it/s] 
Checkpoint Saved
Epoch : 16 Validation Loss : 0.0 : : 28it [00:01, 15.13it/s]                
28it [00:01, 16.02it/s]
Accuracy: 0.8891454935073853
Epoch : 18 Training Loss : 2.139746538887266e-05 : : 82it [00:16,  4.92it/s] 
Checkpoint Saved
Epoch : 17 Validation Loss : 0.0 : : 28it [00:01, 15.09it/s]                 
28it [00:01, 16.23it/s]
Accuracy: 0.8660507798194885
Epoch : 19 Training Loss : 2.849020529538393e-05 : : 82it [00:16,  4.93it/s] 
Checkpoint Saved
Epoch : 18 Validation Loss : 1.5798382759094238 : : 28it [00:01, 14.89it/s] 
28it [00:01, 16.06it/s]
Accuracy: 0.8660507798194885
Epoch : 20 Training Loss : 2.890810264943866e-06 : : 82it [00:16,  4.92it/s] 
Checkpoint Saved
Epoch : 19 Validation Loss : 4.6132929128361866e-05 : : 28it [00:01, 15.12it/s]
28it [00:01, 16.07it/s]
Accuracy: 0.8891454935073853
Epoch : 21 Training Loss : 1.9519775378284976e-05 : : 82it [00:16,  4.92it/s]
Checkpoint Saved
Epoch : 20 Validation Loss : 0.0 : : 28it [00:01, 15.06it/s]                
28it [00:01, 16.07it/s]
Accuracy: 0.8787528872489929
Epoch : 22 Training Loss : 0.01711004227399826 : : 82it [00:16,  4.92it/s]   
Checkpoint Saved
Epoch : 21 Validation Loss : 0.0 : : 28it [00:01, 15.12it/s]               
28it [00:01, 16.01it/s]
Accuracy: 0.8233256340026855
Epoch : 23 Training Loss : 0.003298721509054303 : : 82it [00:16,  4.92it/s] 
Checkpoint Saved
Epoch : 22 Validation Loss : 1.1920928244535389e-07 : : 28it [00:01, 15.06it/s]
28it [00:01, 16.03it/s]
Accuracy: 0.8729792237281799
Epoch : 24 Training Loss : 0.014698987826704979 : : 82it [00:16,  4.93it/s]  
Checkpoint Saved
Epoch : 23 Validation Loss : 0.00011991735664196312 : : 28it [00:01, 15.10it/s]
28it [00:01, 16.21it/s]
Accuracy: 0.8545034527778625
Epoch : 25 Training Loss : 3.3378389616700588e-06 : : 82it [00:16,  4.93it/s]
Checkpoint Saved
Epoch : 24 Validation Loss : 0.0 : : 28it [00:01, 15.00it/s]                
28it [00:01, 15.96it/s]
Accuracy: 0.9053117632865906
Epoch : 26 Training Loss : 2.6135952793993056e-05 : : 82it [00:16,  4.92it/s]
Checkpoint Saved
Epoch : 25 Validation Loss : 0.0 : : 28it [00:01, 15.13it/s]                 
28it [00:01, 15.95it/s]
Accuracy: 0.9110854268074036


# **Final Accuracy Reported after 25th Epoch on the Test Set is : 91.11%**

**Here we are submittng a txt file which have the google drive link for our pickle model.**