In [42]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
from PIL import Image

In [43]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [44]:
#checking for device
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [45]:
class CustomImageDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

        self.img_labels = []
        for label, class_dir in enumerate(os.listdir(img_dir)):
            class_path = os.path.join(img_dir, class_dir)
            if os.path.isdir(class_path):
                for img_name in os.listdir(class_path):
                    if img_name.endswith('.jpg'):
                        img_path = os.path.join(class_path, img_name)
                        self.img_labels.append((img_path, label))

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path, label = self.img_labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [46]:
train_path = r'C:\Users\skyja\Desktop\Data\scene_detection\seg_train\seg_train'
test_path = r'C:\Users\skyja\Desktop\Data\scene_detection\seg_test\seg_test'
pred_path = r'C:\Users\skyja\Desktop\Data\scene_detection\seg_pred\seg_pred'

In [47]:

train_dataset = CustomImageDataset(img_dir=train_path, transform=transformer)
test_dataset = CustomImageDataset(img_dir=test_path, transform=transformer)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [48]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [49]:

#CNN Network
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()

        #Output size after convolution filter
        #((w-f+2P)/s) +1

        #Input shape= (256,3,150,150)

        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)

        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)


        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)



        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)


        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)



        #Feed forwad function

    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)

        output=self.pool(output)

        output=self.conv2(output)
        output=self.relu2(output)

        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)


            #Above output will be in matrix form, with shape (256,32,75,75)

        output=output.view(-1,32*75*75)


        output=self.fc(output)

        return output


In [50]:
def train_model():
    model = ConvNet(num_classes = 6).to(device)
    optimizer = Adam(model.parameters(), lr =0.001, weight_decay =0.001)
    loss_function = nn.CrossEntropyLoss()
    num_epochs = 10
    #calculating the size of training and testing images
    train_count=len(glob.glob(train_path+'/**/*.jpg'))
    test_count=len(glob.glob(test_path+'/**/*.jpg'))
    print(train_count,test_count)

    best_accuracy=0.0

    for epoch in range(num_epochs):
        
        #Evaluation and training on training dataset
        model.train()
        train_accuracy=0.0
        train_loss=0.0
        
        for i, (images,labels) in enumerate(train_loader):
            if torch.cuda.is_available():
                images=images.to(device)
                labels=labels.to(device)
                
            optimizer.zero_grad()
            
            outputs=model(images)
            loss=loss_function(outputs,labels)
            loss.backward()
            optimizer.step()
            
            
            train_loss+= loss.cpu().data*images.size(0)
            pred = outputs.argmax(dim=1)
            
            train_accuracy+=int(torch.sum(pred==labels.data))
            
        train_accuracy=train_accuracy/train_count
        train_loss=train_loss/train_count
        
        
        # Evaluation on testing dataset
        model.eval()
        
        test_accuracy=0.0
        for i, (images,labels) in enumerate(test_loader):
            if torch.cuda.is_available():
                images=images.to(device)
                labels=labels.to(device)
                
            outputs=model(images)
            pred = outputs.argmax(dim=1)
            test_accuracy+=int(torch.sum(pred==labels.data))
        
        test_accuracy=test_accuracy/test_count
        
        
        print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
        
        #Save the best model
        if test_accuracy>best_accuracy:
            torch.save(model.state_dict(),'best_checkpoint.model')
            best_accuracy=test_accuracy
    return model
        


In [51]:
if __name__ == "__main__":
    model = train_model()
    

14034 3000
Epoch: 0 Train Loss: tensor(5.9861) Train Accuracy: 0.5659826136525581 Test Accuracy: 0.5876666666666667
Epoch: 1 Train Loss: tensor(1.0070) Train Accuracy: 0.7037907937865184 Test Accuracy: 0.6986666666666667
Epoch: 2 Train Loss: tensor(0.5312) Train Accuracy: 0.8197235285734644 Test Accuracy: 0.7416666666666667
Epoch: 3 Train Loss: tensor(0.4017) Train Accuracy: 0.8648282741912499 Test Accuracy: 0.7346666666666667
Epoch: 4 Train Loss: tensor(0.3073) Train Accuracy: 0.9018811457887986 Test Accuracy: 0.732
Epoch: 5 Train Loss: tensor(0.2644) Train Accuracy: 0.9169160609947271 Test Accuracy: 0.749
Epoch: 6 Train Loss: tensor(0.2153) Train Accuracy: 0.9306683767992019 Test Accuracy: 0.7393333333333333
Epoch: 7 Train Loss: tensor(0.2391) Train Accuracy: 0.9246829129257518 Test Accuracy: 0.7336666666666667
Epoch: 8 Train Loss: tensor(0.1934) Train Accuracy: 0.940287872310104 Test Accuracy: 0.7213333333333334
Epoch: 9 Train Loss: tensor(0.1736) Train Accuracy: 0.9453470143936155 

In [52]:
model.state_dict

<bound method Module.state_dict of ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)>

In [70]:
checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()
model.to(device)

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [71]:
#prediction function
def prediction(img_path,transformer):
    
    image=Image.open(img_path)
    
    image_tensor=transformer(image).float()
    
    
    image_tensor=image_tensor.unsqueeze_(0)
    
    if torch.cuda.is_available():
        image_tensor = image_tensor.cuda()
        
    
    
    
    output=model(image_tensor)
    
    index=output.cpu().data.numpy().argmax()
    
    pred=classes[index]
    
    return pred
    

In [72]:

images_path=glob.glob(pred_path+'/*.jpg')

In [73]:
pred_dict={}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transformer)

In [74]:
pred_dict

{'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10004.jpg': 'buildings',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10005.jpg': 'mountain',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10012.jpg': 'buildings',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10013.jpg': 'mountain',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10017.jpg': 'glacier',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10021.jpg': 'forest',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\1003.jpg': 'sea',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10034.jpg': 'glacier',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10038.jpg': 'sea',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\seg_pred\\seg_pred\\10040.jpg': 'buildings',
 'C:\\Users\\skyja\\Desktop\\Data\\scene_detection\\