In [1]:
from torch.utils.data import Dataset, DataLoader
import torch, torchvision
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable
import torchvision.transforms as transforms
import pandas as pd
import os
import glob
import numpy as np
import rasterio as rio
from rasterio.plot import reshape_as_image
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
test_directory = '/home/mcgrau/PycharmProjects/SS22_AIML/data/testset'
model_directory = '/home/mcgrau/PycharmProjects/SS22_AIML/best_models'
model_name_directory = 'googlenet'
model_evaluated = os.path.join(model_directory, model_name_directory)
best_model_directory = os.path.join(model_evaluated, 'googlenet_model_epoch_19.pth')

In [3]:
classes = [
    "AnnualCrop",
    "Forest",
    "HerbaceousVegetation",
    "Highway",
    "Industrial",
    "Pasture",
    "PermanentCrop",
    "Residential",
    "River",
    "SeaLake"
]

In [4]:
idx_to_class = {i:j for i, j in enumerate(classes)}
class_to_idx = {value:key for key,value in idx_to_class.items()}

In [5]:
import torchvision.models as models

pretrained = models.googlenet(pretrained = True)

class MyGoogLeNet(nn.Module):
    def __init__(self, my_pretrained_model):
        super(MyGoogLeNet, self).__init__()
        self.pretrained = my_pretrained_model
        self.my_new_layers = nn.Sequential(nn.Linear(1000, 10),
                                           nn.ReLU(),
                                           nn.LogSoftmax(dim=1))

    def forward(self, x):
        x = self.pretrained(x)
        x = self.my_new_layers(x)
        return x

model = MyGoogLeNet(my_pretrained_model=pretrained)
model

MyGoogLeNet(
  (pretrained): GoogLeNet(
    (conv1): BasicConv2d(
      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): BasicConv2d(
      (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3): BasicConv2d(
      (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (inception3a): Inception(
      (branch1): BasicConv2d(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [6]:
testSamples = glob.glob(os.path.join(test_directory, "*.npy"))
len(testSamples)

4232

In [7]:
class testDataset(Dataset):
    def __init__(self, test_directory, transform=False):
        self.files = glob.glob(os.path.join(test_directory, "*.npy"))
        self.transform = transform

    def __len__(self):
      return len(self.files)

    def __getitem__(self, idx):
        item = self.files[idx]
        image = np.load(item).astype(int)
        number = int(item.split('/')[-1].split('_')[1].split('.')[0])

        if self.transform:
          image = transforms.ToTensor()(image).to(torch.float)

        return image, number

In [8]:
testData = testDataset(test_directory = test_directory, transform = True)

In [9]:
test_loader = DataLoader(testData, shuffle=False)

In [12]:
# load state_dict from path
state_dict_best = torch.load(best_model_directory, map_location=torch.device('cpu'))

# init pre-trained model class
best_model = MyGoogLeNet(my_pretrained_model=pretrained)

# load pre-trained models
best_model.load_state_dict(state_dict_best)

<All keys matched successfully>

In [13]:
# set model in evaluation mode
best_model.eval()

MyGoogLeNet(
  (pretrained): GoogLeNet(
    (conv1): BasicConv2d(
      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (conv2): BasicConv2d(
      (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3): BasicConv2d(
      (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (inception3a): Inception(
      (branch1): BasicConv2d(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        

In [14]:
predictions = []
numbers = []

for i, (images, nums) in enumerate(test_loader):
    # run forward pass through the network
    pred = torch.argmax(best_model(images), dim=1)
    predictions.append(pred.int().item())
    numbers.append(nums.int().item())



In [15]:
print(len(predictions))

4232


In [16]:
predClasses = np.vectorize(idx_to_class.get)(predictions)

d = {'test_id': numbers, 'label': predClasses}
predData = pd.DataFrame(data = d)
predData = predData.sort_values(by=['test_id'])
print(predData.head(10))

      test_id                 label
2829        0  HerbaceousVegetation
2326        1  HerbaceousVegetation
3410        2                Forest
2701        3                Forest
3763        4  HerbaceousVegetation
3752        5            AnnualCrop
3271        6                Forest
1273        7                Forest
1029        8                Forest
3926        9            AnnualCrop


In [17]:
predData.to_csv(os.path.join(model_evaluated,'submission.csv'), index = False)

In [18]:
predData.describe()

Unnamed: 0,test_id
count,4232.0
mean,2115.5
std,1221.817499
min,0.0
25%,1057.75
50%,2115.5
75%,3173.25
max,4231.0
