In [1]:
from torch.utils.data import Dataset, DataLoader
import torch, torchvision
import torch.nn.functional as F
from torch import nn, optim
from torch.autograd import Variable
import torchvision.transforms as transforms
import pandas as pd
import os
import glob
import numpy as np
import rasterio as rio
from rasterio.plot import reshape_as_image
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
test_directory = '/home/mcgrau/PycharmProjects/SS22_AIML/data/testset'
model_directory = '/home/mcgrau/PycharmProjects/SS22_AIML/best_models'
model_name_directory = 'resnet_transfer'
model_evaluated = os.path.join(model_directory, model_name_directory)
best_model_directory = os.path.join(model_evaluated, 'resnet_trans_model_epoch_260.pth')

In [3]:
classes = [
    "AnnualCrop",
    "Forest",
    "HerbaceousVegetation",
    "Highway",
    "Industrial",
    "Pasture",
    "PermanentCrop",
    "Residential",
    "River",
    "SeaLake"
]

In [4]:
idx_to_class = {i:j for i, j in enumerate(classes)}
class_to_idx = {value:key for key,value in idx_to_class.items()}

In [5]:
import torchvision.models as models

pretrained = models.resnet18(pretrained = True)

In [15]:
class MyResNet(nn.Module):
    def __init__(self, my_pretrained_model):
        super(MyResNet, self).__init__()
        self.pretrained = my_pretrained_model
        self.my_new_layers = nn.Sequential(nn.Linear(1000, 512),
                                           nn.ReLU(inplace=True),
                                           nn.Linear(512, 10),
                                           nn.LogSoftmax(dim=1))

    def forward(self, x):
        x = self.pretrained(x)
        x = self.my_new_layers(x)
        return x

In [16]:
model = MyResNet(my_pretrained_model=pretrained)
model

MyResNet(
  (pretrained): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

In [17]:
testSamples = glob.glob(os.path.join(test_directory, "*.npy"))
len(testSamples)

4232

In [18]:
transformer = transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Resize(224),
                                  transforms.Normalize(mean = [1116.9715, 1041.5042,  945.7788],
                                                      std = [333.1604,  394.8386,  594.0629])
])

In [19]:
class testDataset(Dataset):
    def __init__(self, test_directory, transform=None):
        self.files = glob.glob(os.path.join(test_directory, "*.npy"))
        self.transform = transform

    def __len__(self):
      return len(self.files)

    def __getitem__(self, idx):
        item = self.files[idx]
        image = np.load(item).astype(int)
        image = np.moveaxis(image, [0,1,2], [2,1,0])
        image = image[(1,2,3), :, :].astype(float)
        image = reshape_as_image(image)
        number = int(item.split('/')[-1].split('_')[1].split('.')[0])

        if self.transform:
          image = self.transform(image)

        return image, number

In [20]:
testData = testDataset(test_directory = test_directory, transform = transformer)

In [21]:
test = testData[1]
print(test)

(tensor([[[-2.6323, -2.6323, -2.5905,  ..., -2.6070, -2.6173, -2.6173],
         [-2.6323, -2.6323, -2.5905,  ..., -2.6070, -2.6173, -2.6173],
         [-2.6271, -2.6271, -2.5845,  ..., -2.6174, -2.6340, -2.6340],
         ...,
         [-2.6072, -2.6072, -2.6029,  ..., -2.6018, -2.6160, -2.6160],
         [-2.5963, -2.5963, -2.5969,  ..., -2.6301, -2.6533, -2.6533],
         [-2.5963, -2.5963, -2.5969,  ..., -2.6301, -2.6533, -2.6533]],

        [[-1.8679, -1.8679, -1.8570,  ..., -1.9592, -1.9793, -1.9793],
         [-1.8679, -1.8679, -1.8570,  ..., -1.9592, -1.9793, -1.9793],
         [-1.8700, -1.8700, -1.8571,  ..., -1.9738, -1.9978, -1.9978],
         ...,
         [-1.8668, -1.8668, -1.8737,  ..., -1.9572, -1.9847, -1.9847],
         [-1.8603, -1.8603, -1.8673,  ..., -1.9668, -2.0021, -2.0021],
         [-1.8603, -1.8603, -1.8673,  ..., -1.9668, -2.0021, -2.0021]],

        [[-1.1897, -1.1897, -1.2006,  ..., -1.2364, -1.2436, -1.2436],
         [-1.1897, -1.1897, -1.2006,  ..., -

In [22]:
test_loader = DataLoader(testData, shuffle=False)

In [23]:
# load state_dict from path
state_dict_best = torch.load(best_model_directory, map_location=torch.device('cpu'))

# init pre-trained model class
best_model = model

# load pre-trained models
best_model.load_state_dict(state_dict_best)

<All keys matched successfully>

In [24]:
# set model in evaluation mode
best_model.double()
best_model.eval()

MyResNet(
  (pretrained): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

In [31]:
predictions = []
numbers = []

for i, (images, nums) in enumerate(test_loader):
    # run forward pass through the network
    pred = torch.argmax(best_model(images), dim=1)
    predictions.append(pred.int().item())
    numbers.append(nums.int().item())

tensor([3092])
tensor([292])
tensor([3463])
tensor([2840])
tensor([95])
tensor([3872])
tensor([3139])
tensor([1679])
tensor([3268])
tensor([786])
tensor([2560])
tensor([2001])
tensor([1500])
tensor([4146])
tensor([4169])
tensor([1344])
tensor([1237])
tensor([2490])
tensor([873])
tensor([4213])
tensor([3316])
tensor([1172])
tensor([3148])
tensor([1995])
tensor([4112])
tensor([2762])
tensor([1892])
tensor([2154])
tensor([833])
tensor([1777])
tensor([1973])
tensor([55])
tensor([3136])
tensor([1522])
tensor([856])
tensor([1813])
tensor([2936])
tensor([561])
tensor([864])
tensor([3894])
tensor([3416])
tensor([2582])
tensor([1192])
tensor([3299])
tensor([3747])
tensor([4173])
tensor([1868])
tensor([720])
tensor([3364])
tensor([663])
tensor([3798])
tensor([2158])
tensor([2491])
tensor([907])
tensor([3943])
tensor([4070])
tensor([3167])
tensor([4085])
tensor([2908])
tensor([4036])
tensor([3585])
tensor([2300])
tensor([78])
tensor([3744])
tensor([2666])
tensor([3392])
tensor([3771])
tensor([163

KeyboardInterrupt: 

In [26]:
print(len(predictions))

4232


In [27]:
predClasses = np.vectorize(idx_to_class.get)(predictions)

d = {'test_id': numbers, 'label': predClasses}
predData = pd.DataFrame(data = d)
predData = predData.sort_values(by=['test_id'])
print(predData.head(10))

      test_id                 label
2829        0               SeaLake
2326        1  HerbaceousVegetation
3410        2               SeaLake
2701        3               SeaLake
3763        4                Forest
3752        5                Forest
3271        6               SeaLake
1273        7               SeaLake
1029        8               SeaLake
3926        9               SeaLake


In [28]:
predData.to_csv(os.path.join(model_evaluated,'submission.csv'), index = False)

In [29]:
predData.describe()

Unnamed: 0,test_id
count,4232.0
mean,2115.5
std,1221.817499
min,0.0
25%,1057.75
50%,2115.5
75%,3173.25
max,4231.0
