In [1]:
# Importing libraries
import os
import numpy as np
import glob
import torch
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [2]:
# Checking for the device- i.e if torch is using CUDA or not.
# If it is not using CUDA, it will train on CPU.
# Else it will train on GPU.

# 'cuda' and 'cpu' have to be as given since they are library terms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
# Data Processing- Using transforms

transformer = transforms.Compose([
    # Resizing every image to (150, 150) for training
    transforms.Resize((150, 150)),
    
    # Random horizontally flipping images with 0.5 probability
    transforms.RandomHorizontalFlip(),
    
    # Changes [0-255] value to [0-1], also changes data type from numpy to tensor
    transforms.ToTensor(),
    
    # Changes range from [0-1] to [-1, 1]
    # Column represents RGB channel, and row represents mean and std
    # x will be replaces by the Z-score = (x-mean)/std
    transforms.Normalize([0.5, 0.5, 0.5],
                        [0.5, 0.5, 0.5])
])

In [4]:
# Data Loader

# Paths
train_path = '/home/siddhant/Pictures/Intel-image-dataset/archive/seg_train'
test_path = '/home/siddhant/Pictures/Intel-image-dataset/archive/seg_test'


train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform = transformer),
    # Hyper parameters
    batch_size = 256,
    # Shuffle to get rid of bias
    shuffle = True
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform = transformer),
    # Hyper parameters
    batch_size = 256,
    # Shuffle to get rid of bias
    shuffle = True
)

In [5]:
# Getting categories

root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [6]:
# CNN Network- In PyTorch we need to define class for a CNN- it will inherit the nn.Module class

class ConvNet(nn.Module):
    def __init__(self, num_classes = 6):
        super(ConvNet, self).__init__()
        
        # Input shape (256, 3, 150, 150) - (batch size, number of channels, height, width)
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 12, kernel_size = 3, stride = 1, padding = 1)
        # ((w-f+2p)/s)+1
        # New shape = (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features = 12)
        # New shape = (256, 12, 150, 150)
        self.relu1 = nn.ReLU()
        # New shape = (256, 12, 150, 150)
        
        self.pool = nn.MaxPool2d(kernel_size = 2)
        # New shape = (256, 12, 75, 75)
        
        self.conv2 = nn.Conv2d(in_channels = 12, out_channels = 20, kernel_size = 3, stride = 1, padding = 1)
        # New shape = (256, 20, 75, 75)
        self.relu2 = nn.ReLU()
        
        self.conv3 = nn.Conv2d(in_channels = 20, out_channels = 32, kernel_size = 3, stride = 1, padding = 1)
        # New shape = (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features = 32)
        # New shape = (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        # New shape = (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features = 32*75*75, out_features = num_classes)
    
    # Feed forward function
    
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)
        
        output = self.pool(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)
        
        # Output will be [256, 32, 75, 75]
        
        output = output.view(-1, 32*75*75)
        
        output = self.fc(output)
        return output

In [10]:
# Getting model and sending it to the device
model = ConvNet(num_classes = 6).to(device)

In [12]:
# Optimizer and loss function
optimizer = Adam(model.parameters(), lr = 0.001, weight_decay = 0.0001)
loss_function = nn.CrossEntropyLoss()

In [13]:
num_epochs = 10

In [17]:
# Calculating the number of training and testing datasets

train_count = len(glob.glob(train_path + '/**/*.jpg'))
test_count = len(glob.glob(test_path + '/**/*.jpg'))

In [18]:
print(train_count, test_count)

13307 3000


In [22]:
# Training model and saving best model

best_accuracy = 0.0

for epoch in range(num_epochs):
    
    # Train and evaluate
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cude())
            labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.cpu().data*images.size(0)
        _,prediction = torch.max(outputs.data, 1)
        
        train_accuracy += int(torch.sum(prediction == labels.data))
    
    train_accuracy /= train_count
    train_loss /= train_count
    
    # Test and evaluate
    model.eval()
    
    test_accuracy = 0.0
    
    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cude())
            labels = Variable(labels.cuda())
        
        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        
        test_accuracy += int(torch.sum(prediction == labels.data))
    
    test_accuracy /= test_count
    
    print(epoch, train_loss, train_accuracy, test_accuracy)
    
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy

0 tensor(13.5074) 0.5319756519125273 0.4673333333333333
1 tensor(1.5570) 0.6908394078304652 0.627
2 tensor(0.9131) 0.7705718794619373 0.6333333333333333
3 tensor(0.7300) 0.8131810325392651 0.7056666666666667
4 tensor(0.7723) 0.8241527015856316 0.7166666666666667
5 tensor(0.4566) 0.8790110468174644 0.68
6 tensor(0.3804) 0.9013301270008266 0.727
7 tensor(0.3867) 0.8962951829863981 0.691
8 tensor(0.1891) 0.9449913579319156 0.752
9 tensor(0.1038) 0.9700157811678064 0.7236666666666667
