In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import torchvision
import pathlib
import glob
from PIL import Image
from torch.optim import Adam
import torch.nn as nn
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
print(device)

cuda


In [6]:
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],
    [0.5,0.5,0.5]


    )
])

In [7]:
train_path = '/content/seg_train/seg_train'
test_path = '/content/seg_test/seg_test'

train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path,transform = transformer,),
batch_size = 64,shuffle = True)

test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path,transform = transformer,),
batch_size = 32,shuffle = True)

In [8]:
root = pathlib.Path(train_path)
classes = [j.name.split('/')[-1] for j in  root.iterdir()
]

In [9]:
print(classes)

['forest', 'street', 'sea', 'glacier', 'buildings', 'mountain']


In [29]:

class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output

In [30]:
model = ConvNet(num_classes = 6).to(device)

In [31]:
optimizer = Adam(model.parameters(),lr = 0.001,weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [32]:
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))

In [33]:
print(train_count,test_count)

14034 3000


In [34]:
num_epochs = 7

In [35]:
best_accuracy = 0.0

#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy





Epoch: 0 Train Loss: tensor(6.3230) Train Accuracy: 0.592917201083084 Test Accuracy: 0.6846666666666666
Epoch: 1 Train Loss: tensor(2.2118) Train Accuracy: 0.7310104033062562 Test Accuracy: 0.65
Epoch: 2 Train Loss: tensor(1.1949) Train Accuracy: 0.804831124412142 Test Accuracy: 0.734
Epoch: 3 Train Loss: tensor(0.5813) Train Accuracy: 0.8702436938862762 Test Accuracy: 0.6586666666666666
Epoch: 4 Train Loss: tensor(0.4461) Train Accuracy: 0.8919053726663816 Test Accuracy: 0.7276666666666667
Epoch: 5 Train Loss: tensor(0.3704) Train Accuracy: 0.9066552657830982 Test Accuracy: 0.7476666666666667
Epoch: 6 Train Loss: tensor(0.2367) Train Accuracy: 0.9321647427675644 Test Accuracy: 0.7683333333333333


In [None]:
checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=6)
model.load_state_dict(checkpoint)
model.eval()

In [45]:
#prediction function
def prediction(img_path,transformer):
    
    image=Image.open(img_path)
    
    image_tensor=transformer(image).float()
    
    
    image_tensor=image_tensor.unsqueeze_(0)
    
    if torch.cuda.is_available():
        image_tensor.cuda()
        
    input=Variable(image_tensor)
    
    
    output=model(input)
    
    index=output.data.numpy().argmax()
    
    pred=classes[index]
    
    return pred
    

In [67]:
import matplotlib.pyplot as plt

path = '/content/seg_pred/seg_pred'

image_paths = glob.glob(path+'/*.jpg')



pred_dict = {}

for i in image_paths:
  pred_dict[i[i.rfind('/')+1:]] = prediction(i,transformer)
pred_dict





{'9066.jpg': 'mountain',
 '17108.jpg': 'buildings',
 '15918.jpg': 'buildings',
 '11533.jpg': 'forest',
 '12764.jpg': 'glacier',
 '633.jpg': 'buildings',
 '5751.jpg': 'glacier',
 '4490.jpg': 'sea',
 '1359.jpg': 'glacier',
 '2329.jpg': 'forest',
 '11580.jpg': 'forest',
 '20673.jpg': 'sea',
 '10272.jpg': 'sea',
 '4712.jpg': 'glacier',
 '3962.jpg': 'buildings',
 '17082.jpg': 'forest',
 '14762.jpg': 'street',
 '19725.jpg': 'buildings',
 '5.jpg': 'forest',
 '15053.jpg': 'sea',
 '14151.jpg': 'mountain',
 '2017.jpg': 'forest',
 '10771.jpg': 'forest',
 '23701.jpg': 'sea',
 '11949.jpg': 'street',
 '2299.jpg': 'forest',
 '9175.jpg': 'glacier',
 '20814.jpg': 'street',
 '21460.jpg': 'street',
 '10321.jpg': 'sea',
 '9134.jpg': 'buildings',
 '14484.jpg': 'glacier',
 '9382.jpg': 'forest',
 '8030.jpg': 'glacier',
 '1835.jpg': 'glacier',
 '14271.jpg': 'street',
 '11444.jpg': 'street',
 '819.jpg': 'sea',
 '11091.jpg': 'buildings',
 '13392.jpg': 'mountain',
 '13461.jpg': 'mountain',
 '17182.jpg': 'buildin