## Libraries

In [168]:
import glob
import os
import sys
import shutil
import time

import pandas as pd
import numpy as np
import torch
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils,datasets
from sklearn.model_selection import train_test_split

import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

import warnings
warnings.filterwarnings("ignore")


import matplotlib.pyplot as plt
from ipywidgets import interact , widgets
%matplotlib inline

In [243]:
## Raw Data Directory Location and Root Directory for Project to run this notebook
raw_data_dir = "../CNN_Models/raw_data/*"
root_dir = "../CNN_Models/"

## Creating CSV for image category and location

In [103]:
file_ls = []
for folder in glob.glob(raw_data_dir):
    folder_name = folder.split("/")[-1]
    for file in glob.glob(f"{folder}/*"):
        file_ls.append([folder_name,file])
file_df = pd.DataFrame(file_ls,columns =['category','location'])
file_df.to_csv(f"{root_dir}filenames.csv",index=False)

In [104]:
file_df.head()

Unnamed: 0,category,location
0,MapT,/home/neha/CNN_Models/raw_data/MapT/chart-1756...
1,MapT,/home/neha/CNN_Models/raw_data/MapT/austin.jpg
2,MapT,/home/neha/CNN_Models/raw_data/MapT/1024px-NPS...
3,MapT,/home/neha/CNN_Models/raw_data/MapT/e341deada1...
4,MapT,/home/neha/CNN_Models/raw_data/MapT/image-2016...


## see images and numbers of data points in one category

In [107]:
def show_image(image):
     plt.imshow(image)

@interact
def plot_image(cat=file_df.category.unique(),num_images = range(1,10)):
    tmp_df = file_df[file_df.category==cat].reset_index(drop=True)
    print(f"Total Images in {cat} : {tmp_df.shape[0]}")
    ncols = 4
    nrows = int(num_images/ncols)+1
    plt.figure(figsize=(20,15))
    plt.subplots_adjust(wspace = 0.5,hspace=0.5)
    for idx,row in tmp_df.iterrows():
        if idx<num_images:
            plt.subplot(nrows,ncols,idx+1)
            image = plt.imread(row['location'])
            show_image(image)
            plt.title(f"{cat}")
    

interactive(children=(Dropdown(description='cat', options=('MapT', 'MapO', 'MapP', 'Super_market_discount_post…

## Train and Test Data Split

In [121]:
def create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.mkdir(dir_path)

def move_images(dir_path,df):
    for i in df.category.unique():
        create_dir(f"{dir_path}{i}/")
        
    for idx,row in df.iterrows():
        img_name = row['location'].split("/")[-1]
        shutil.copy(row['location'],f"{dir_path}{row['category']}/{img_name}")

In [122]:
test_size = 0.2
x_train,x_test,y_train,y_test = train_test_split(file_df,file_df['category'],stratify = file_df['category']
                                                 ,random_state=4,test_size =test_size)

print(f"""#Training Images : {x_train.shape[0]}
#Test Images : {x_test.shape[0]}
""")

create_dir(f"{root_dir}dataset/")
create_dir(f"{root_dir}dataset/train/")
create_dir(f"{root_dir}dataset/test/")    
move_images(f"{root_dir}dataset/train/",x_train)
move_images(f"{root_dir}dataset/test/",x_test)

#Training Images : 160
#Test Images : 40



## Creating Data Loaders

In [127]:
output_size = (224,224)
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 10

In [128]:
composed = transforms.Compose([transforms.Resize(output_size),
                               transforms.ToTensor(),
                               transforms.Normalize(mean, std)
                              ])

train_dataset = datasets.ImageFolder(f"{root_dir}dataset/train",transform=composed)
test_dataset = datasets.ImageFolder(f"{root_dir}dataset/test",transform=composed)

train_dataloader = DataLoader(train_dataset,batch_size = batch_size)
test_dataloader =  DataLoader(test_dataset,batch_size = batch_size)

## Model Training and Validation

In [204]:
def accuracy(actual,predicted):
    actual = actual.detach().cpu().numpy()
    predicted = np.argmax(predicted.detach().cpu().numpy(),axis=1)
    
    correct = (actual==predicted).sum()
    return correct

def train_valid_model(model,criterion,optimizer,num_epoch,train_dataloader,test_dataloader,model_name,save_model,device_id):
    result_ls = []
    for epoch in range(1,num_epochs+1):
        strt = time.time()
        train_loss, train_acc = 0,0
        for train_inp,train_label in train_dataloader:
            optimizer.zero_grad()

            train_inp = train_inp.cuda(device_id)
            train_label = train_label.cuda(device_id)

            train_out = model(train_inp)
            loss = criterion(train_out,train_label)
            train_loss += loss.item()
            train_acc += accuracy(train_label,train_out)

            loss.backward()
            optimizer.step()

        test_loss, test_acc = 0,0
        for test_inp,test_label in test_dataloader:
            test_inp = test_inp.cuda(device_id)
            test_label = test_label.cuda(device_id)

            test_out = model(test_inp)
            loss_ = criterion(test_out,test_label)
            test_loss += loss_.item()
            test_acc += accuracy(test_label,test_out)

        train_loss,train_acc = train_loss/train_shape,train_acc/train_shape
        test_loss, test_acc = test_loss/test_shape,test_acc/test_shape

        end = time.time()
        result_ls.append([epoch,train_loss,train_acc,test_loss, test_acc])
        if epoch%print_epoch==0 and print_epoch>0:
            print(f"""epoch:{epoch},train_loss:{round(train_loss*100,3)}%,train_accuracy:{round(train_acc*100,3)}%,test_loss:{round(test_loss*100,3)}%,test_accuracy:{round(test_acc*100,3)}%,time:{int(end-strt)}secs""")

    results_df = pd.DataFrame(result_ls,columns =['epoch','train_loss','train_accuracy','test_loss','test_accuracy'])
    results_df.to_csv(f"{root_dir}model_summary/{model_name}.csv",index=False)

    if save_model:
        torch.save(model.state_dict(),f"{root_dir}models/{model_name}.pth")


## Defining Model and run one at a time

In [205]:
if model_idx == 1:
    model = models.resnet18()
elif model_idx == 2:
    model = models.alexnet()
elif model_idx == 3:
    model = models.vgg16()
elif model_idx == 4:
    model = models.squeezenet1_0()
elif model_idx == 5:
    model = models.densenet161()
elif model_idx == 6:
    model = models.inception_v3()
elif model_idx == 7:
    model = models.googlenet()
elif model_idx == 8:
    model = models.shufflenet_v2_x1_0()
elif model_idx == 9:
    model = models.mobilenet_v2()
elif model_idx == 10:
    model = models.resnext50_32x4d()
elif model_idx == 11:
    model = models.wide_resnet50_2()
elif model_idx == 12:
    model = models.mnasnet1_0()
else :
    print("Invalid index to select model")

In [206]:
model_name_dict = {1:'resnet18',
2:'alexnet',
3:'vgg16',
4:'squeezenet',
5:'densenet',
6:'inception',
7:'googlenet',
8:'shufflenet',
9:'mobilenet',
10:'resnext50_32x4d',
11:'wide_resnet50_2',
12:'mnasnet'}

In [207]:
num_classes = len(train_dataloader.dataset.classes)
print(f"Number of Classes : {num_classes}")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Available Device : {device}")

train_shape = x_train.shape[0]
test_shape = x_test.shape[0]
save_model = True

create_dir(f"{root_dir}model_summary/")
create_dir(f"{root_dir}models/")

learning_rate = 0.001
num_epochs = 4
device_id = 1
print_epoch  = 2 ## assign -1 if you don't want to print intermediate results
model_idx = 1
model_name = model_name_dict[model_idx]
print(f"Model Name : {model_name}")

Number of Classes : 4
Available Device : cuda
resnet18


In [209]:

num_feat = model.fc.in_features
model.fc = nn.Linear(num_feat,num_classes) ## make change in accordance with model
model = model.cuda(device_id)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr =learning_rate)

train_valid_model(model,criterion,optimizer,num_epochs,train_dataloader,test_dataloader,model_name,save_model,device_id)

epoch:2,train_loss:18.537%,train_accuracy:7.5%,test_loss:16.975%,test_accuracy:25.0%,time:5secs
epoch:4,train_loss:17.726%,train_accuracy:0.625%,test_loss:14.489%,test_accuracy:27.5%,time:6secs


## Model Comparison Summary

In [244]:
master_summary =  pd.DataFrame()
for summary_file in glob.glob(f"{root_dir}model_summary/*.csv"):
    tmp_df  = pd.read_csv(summary_file)
    tmp_df['model_name'] = summary_file.split("/")[-1].replace(".csv","")
    master_summary = master_summary.append(tmp_df)
last_epoch_summary = pd.merge(master_summary.groupby(['model_name'],as_index=False).epoch.max(),master_summary,on=['model_name','epoch'])

In [245]:
last_epoch_summary.to_csv(f"{root_dir}model_comparison.csv")

In [246]:
last_epoch_summary

Unnamed: 0,model_name,epoch,train_loss,train_accuracy,test_loss,test_accuracy
0,alexnet,4,0.194379,0.03125,0.143528,0.25
1,resnet18,4,0.163787,0.0,0.137917,0.225
2,resnet50,4,0.150584,0.21875,0.139536,0.25
