In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
from io import open
import os
from PIL import Image
import pathlib
import glob
import cv2


In [None]:
train_path='/content/drive/MyDrive/scene_detection/seg_train/seg_train'
pred_path= '/content/drive/MyDrive/scene_detection/seg_pred/seg_pred'


In [None]:
#categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [None]:
#CNN Network Class

class ConvNet(nn.Module):
  def __init__(self, num_classes=6):
    super(ConvNet,self).__init__()

    #Output size after convolution filter
        #((w-f+2P)/s) +1

    ##Input shape= (256,3,150,150)
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)

    #shape= (256,12,150,150)
    self.bn1 = nn.BatchNorm2d(num_features=12)
    self.relu1 = nn.ReLU()

    self.pool = nn.MaxPool2d(kernel_size=2)
    #Reduce the image size be factor 2
    #Shape= (256,12,75,75)

    #Add second Convolution layer and relu layer shape is (256,20,75,75)

    self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
    self.relu2 = nn.ReLU()
    
    #Add third Convolution layer and relu layer shape is (256,32,75,75)

    self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.bn3 = nn.BatchNorm2d(num_features=32)
    self.relu3 = nn.ReLU()
    
    #Adding fully connected layer we feed  input feature which is d, h and image of C layer output
    self.fc = nn.Linear(in_features= 75 * 75 * 32, out_features = num_classes)

  #Feed forward function
  def forward(self, input):
    output = self.conv1(input)
    output = self.bn1(output)
    output = self.relu1(output)

    output = self.pool(output)

    output = self.conv2(output)
    output = self.relu2(output)

    output = self.conv3(output)
    output = self.bn3(output)
    output = self.relu3(output)

                #Above output will be in matrix form, with shape (256,32,75,75)
    #reshape function then feed inside
    output = output.view(-1,32 * 75 * 75 )
    output = self.fc(output)

    return output


In [None]:
model_path = '/content/drive/MyDrive/scene_detection/best_checkpoint.model'
checkpoint = torch.load(model_path)
model = ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [None]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [None]:
#prediction function
def prediction(img_path, transformer):

  image = Image.open(img_path) # for PIL read the imgage
  image_tensor = transformer(image).float() # convert image to the tensor
  image_tensor=image_tensor.unsqueeze_(0) # pytorch treat all images as batch so we add batch dimension

  if torch.cuda.is_available():
    image_tensor.cuda()

  input = Variable(image_tensor) # image_tensor to variable

  output = model(input)
  #argmax to get max function woth PB
  index = output.data.numpy().argmax()

  pred = classes[index]
    
  return pred

In [None]:
images_path = glob.glob(pred_path+'/*.jpg') # fetch all image indside pred_path and save in images_path

In [None]:
#images_path

In [None]:
#image name as key and pred as value in empty dic
pred_dict = {}

for i in images_path:
  pred_dict[i[i.rfind('/')+1:]] = prediction(i,transformer)

In [None]:
pred_dict

{'7148.jpg': 'glacier',
 '7107.jpg': 'forest',
 '7147.jpg': 'street',
 '7050.jpg': 'buildings',
 '7028.jpg': 'street',
 '7115.jpg': 'sea',
 '7068.jpg': 'glacier',
 '7156.jpg': 'buildings',
 '7124.jpg': 'mountain',
 '7089.jpg': 'buildings',
 '7066.jpg': 'forest',
 '7070.jpg': 'sea',
 '7069.jpg': 'mountain',
 '7034.jpg': 'buildings',
 '7055.jpg': 'buildings',
 '7100.jpg': 'glacier',
 '7092.jpg': 'mountain',
 '7109.jpg': 'buildings',
 '7075.jpg': 'mountain',
 '7029.jpg': 'glacier',
 '7110.jpg': 'buildings',
 '7094.jpg': 'mountain',
 '7153.jpg': 'street',
 '7160.jpg': 'forest',
 '7088.jpg': 'mountain',
 '7067.jpg': 'glacier',
 '7106.jpg': 'street',
 '7149.jpg': 'street',
 '7025.jpg': 'buildings',
 '7217.jpg': 'mountain',
 '7224.jpg': 'sea',
 '7230.jpg': 'street',
 '7210.jpg': 'street',
 '7213.jpg': 'street',
 '7190.jpg': 'sea',
 '7182.jpg': 'sea',
 '7218.jpg': 'street',
 '720.jpg': 'forest',
 '7183.jpg': 'glacier',
 '7249.jpg': 'street',
 '7244.jpg': 'buildings',
 '7187.jpg': 'forest',
 '7