<a href="https://colab.research.google.com/github/smit-1z/DataMiningTermProject/blob/main/CNN_PyTorch_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://github.com/gaurav67890/Pytorch_Tutorials/blob/master/cnn-scratch-training.ipynb

https://github.com/gaurav67890/Pytorch_Tutorials/blob/master/cnn-scratch-inference.ipynb

In [1]:
#Load libraries
import os
import numpy as np
import torch
import torchvision
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
import torch.nn as nn
import glob
import pathlib
from io import open
from PIL import Image
import cv2

# Ignore the warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
print(device)

cuda


In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
#Path for training and testing directory

train_path='/content/drive/MyDrive/ImageDataSet/Train'
test_path='/content/drive/MyDrive/ImageDataSet/Test'
pred_path='/content/drive/MyDrive/ImageDataSet/Dev'


In [6]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [7]:
#Dataloader
train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=128, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=128, shuffle=True
)

#changed batch size from 256 to 128

In [8]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [9]:
print(classes)

['cheetah', 'fox', 'hyena', 'lion', 'tiger', 'wolf']


In [45]:
#CNN Network


class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output
            
        

In [46]:
model=ConvNet(num_classes=6).to(device)

In [47]:
#Optimizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [48]:
num_epochs=15

In [49]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.png'))
test_count=len(glob.glob(test_path+'/**/*.png'))

In [50]:
print(train_count,test_count)

1457 176


In [51]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy
    
       

Epoch: 0 Train Loss: tensor(27.0329) Train Accuracy: 0.23747426218256693 Test Accuracy: 0.13636363636363635
Epoch: 1 Train Loss: tensor(11.5391) Train Accuracy: 0.4344543582704187 Test Accuracy: 0.21022727272727273
Epoch: 2 Train Loss: tensor(6.0168) Train Accuracy: 0.49759780370624573 Test Accuracy: 0.30113636363636365
Epoch: 3 Train Loss: tensor(2.9282) Train Accuracy: 0.6245710363761153 Test Accuracy: 0.25
Epoch: 4 Train Loss: tensor(1.5536) Train Accuracy: 0.7590940288263556 Test Accuracy: 0.4715909090909091
Epoch: 5 Train Loss: tensor(0.7447) Train Accuracy: 0.8490048043925875 Test Accuracy: 0.5
Epoch: 6 Train Loss: tensor(0.4346) Train Accuracy: 0.8963623884694578 Test Accuracy: 0.45454545454545453
Epoch: 7 Train Loss: tensor(0.5220) Train Accuracy: 0.8846945778997941 Test Accuracy: 0.4943181818181818
Epoch: 8 Train Loss: tensor(0.1800) Train Accuracy: 0.9389155799588195 Test Accuracy: 0.5056818181818182
Epoch: 9 Train Loss: tensor(0.1222) Train Accuracy: 0.9629375428963624 Test 

In [52]:
checkpoint = torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [55]:
#Transforms #2 
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [56]:
def prediction(img_path,transformer):
  image=Image.open(img_path).convert('RGB')
  image_tensor = transformer(image).float()
  image_tensor=image_tensor.unsqueeze_(0)
  if torch.cuda.is_available():
      image_tensor.cuda()
        
  input=Variable(image_tensor)
  output=model(input)
  index=output.data.numpy().argmax()
  pred=classes[index]
    
  return pred

In [57]:
images_path=glob.glob(pred_path+'/*.png')

print(images_path[0])

/content/drive/MyDrive/ImageDataSet/Dev/00000571_224resized.png


In [58]:
pred_dict={}

for i in images_path:
  pred_dict[i[i.rfind('/')+1:]] = prediction(i,transformer)

In [59]:
pred_dict

{'00000571_224resized.png': 'cheetah',
 '00000568_224resized.png': 'tiger',
 '00000567_224resized.png': 'tiger',
 '00000564_224resized.png': 'tiger',
 '00000563_224resized.png': 'cheetah',
 '00000547_224resized.png': 'cheetah',
 '00000549_224resized.png': 'hyena',
 '00000580_224resized.png': 'tiger',
 '00000569_224resized.png': 'cheetah',
 '00000562_224resized.png': 'cheetah',
 '00000596_224resized.png': 'hyena',
 '00000593_224resized.png': 'cheetah',
 '00000587_224resized.png': 'cheetah',
 '00000595_224resized.png': 'cheetah',
 '00000419_224resized.png': 'fox',
 '00000440_224resized.png': 'fox',
 '00000431_224resized.png': 'hyena',
 '00000426_224resized.png': 'fox',
 '00000479_224resized.png': 'fox',
 '00000475_224resized.png': 'hyena',
 '00000487_224resized.png': 'fox',
 '00000442_224resized.png': 'fox',
 '00000445_224resized.png': 'fox',
 '00000483_224resized.png': 'cheetah',
 '00000425_224resized.png': 'wolf',
 '00000465_224resized.png': 'fox',
 '00000421_224resized.png': 'cheetah'