In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
%matplotlib inline
import random
import os
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision.models as models
import seaborn as sns
from sklearn.metrics import confusion_matrix

# **Data Preparation**

In [None]:
train = '/kaggle/input/intel-image-classification/seg_train/seg_train'
test =  '/kaggle/input/intel-image-classification/seg_test/seg_test'
pred = '/kaggle/input/intel-image-classification/seg_pred/seg_pred'

In [None]:
images = []
for folder in os.listdir(train):
    for image in os.listdir(train + '/' + folder):
        images.append(os.path.join(train, folder, image))

plt.figure(figsize=(20,20))

for i in range(16):
    random_img = random.choice(images)
    imgs = mpimg.imread(random_img)
    plt.subplot(4, 4, i+1)
    plt.imshow(imgs)
plt.show()

In [None]:
x=0
for folder in  os.listdir(train) : 
    files = glob.glob(pathname= str(train +'/'+ folder + '/*.jpg'))
    print(f'In the train folder, there are {len(files)} images inside the folder {folder}')
    x=x+len(files)
print(f'In the train folder, there are a total of',x ,'images')

In [None]:
x=0
for folder in  os.listdir(test) : 
    files = glob.glob(pathname= str(test +'/'+ folder + '/*.jpg'))
    x=x+len(files)
    print(f'In the test folder, there are {len(files)} images inside the folder {folder}')
print(f'In the test folder, there are a total of',x ,'images')

In [None]:
files = glob.glob(pathname= str(pred +'/*.jpg'))
print(f'In the prediction folder , there are a total of {len(files)} images')

# **Model definition**

In [None]:
train_transforms = transforms.Compose([transforms.Resize(255),
                                       transforms.RandomResizedCrop(224),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(255),
                                      transforms.RandomResizedCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

train_data = datasets.ImageFolder(train, transform=train_transforms)
test_data = datasets.ImageFolder(test, transform=test_transforms)


trainloader = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=16)


In [None]:
model = models.resnet152(pretrained=True)
model

In [None]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for param in model.parameters():
    param.requires_grad = False

model.fc = nn.Sequential(nn.Linear(2048, 6),
                         nn.LogSoftmax(dim=1))


criterion = nn.NLLLoss()

model.to(device)

optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# **Model training**

In [None]:
epochs = 5
steps = 0
running_loss = 0

train_losses, test_losses = [], []
for epoch in range(epochs):
    for inputs, labels in trainloader:
        steps += 1
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        

    test_loss = 0
    accuracy = 0
    with torch.no_grad():
        model.eval()
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            logps = model.forward(inputs)
            batch_loss = criterion(logps, labels)
                    
            test_loss += batch_loss.item()
                    

            ps = torch.exp(logps)
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
            
    model.train()
    train_losses.append(running_loss/len(trainloader))
    test_losses.append(test_loss/len(testloader))
    print("Epoch: {}/{}.. ".format(epoch+1, epochs),
          "Training Loss: {:.3f}.. ".format(train_losses[-1]),
          "Test Loss: {:.3f}.. ".format(test_losses[-1]),
          "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
    running_loss = 0

# **Model evaulation after training**

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

plt.figure()
plt.plot(test_losses, label='Test Loss')
plt.plot(train_losses, label='Train Loss')
plt.xlabel("epoch")
plt.ylabel("Loss")

In [None]:
y_pred_list = []
y_true_list = []
with torch.no_grad():
    for inp, labels in testloader:
            inp, labels = inp.to(device), labels.to(device)
            y_test_pred = model(inp)
            _, y_pred_tag = torch.max(y_test_pred, dim = 1)
            y_pred_list.append(y_pred_tag.cpu().numpy())
            y_true_list.append(labels.cpu().numpy())

flat_pred = []
flat_true = []
for i in range(len(y_pred_list)):
    for j in range(len(y_pred_list[i])):
        flat_pred.append(y_pred_list[i][j])
        flat_true.append(y_true_list[i][j])

In [None]:
outcomes = os.listdir(train)
fig = plt.figure(figsize=(10,10))
cm = confusion_matrix(flat_true,flat_pred)
ax=sns.heatmap(cm,fmt=' ',annot=True,cmap='Blues')
ax.invert_yaxis()
ax.set_xticklabels(outcomes,rotation=90)
ax.set_yticklabels(outcomes,rotation=0)
ax.set_xlabel('Actual Category')
ax.set_ylabel('Predicted Category')
ax.set_title('Actual Vs Predicted Category')
plt.show()

# **Observation and comments**

As we can see the model is having some trouble with sea/buildings and glacier/mountain, but the rest is predicted well. I am overall satisfied with the results

