# Predicting classes for the new test data set

In [5]:
## coding: utf-8

import numpy as np
import time
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

from airbus_dataloader import *
from airbus_train_val_functions import *

import warnings
warnings.filterwarnings('ignore')

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*45*45, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*45*45)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


class CNN_32_x4(nn.Module):
    def __init__(self, num_classes):
        super(CNN_32_x4, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 32, 5) # input features, output features, kernel size
        self.act1 = nn.ReLU()
        self.mp1 = nn.MaxPool2d(2, 2) # kernel size, stride
        
        self.conv2 = nn.Conv2d(32, 64, 5) # input features, output features, kernel size
        self.act2 = nn.ReLU()
        self.mp2 = nn.MaxPool2d(2, 2) # kernel size, stride
        
        self.fc = nn.Linear(64*45*45, num_classes) # 4x4 is the remaining spatial resolution here

    def forward(self, x):
        x = self.mp1(self.act1(self.conv1(x)))
        x = self.mp2(self.act2(self.conv2(x)))
        # The view flattens the output to a vector (the representation needed by the classifier)
        x = x.view(-1, 64*45*45)
        x = self.fc(x)
        return x
    
class CNN_8_x4(nn.Module):
    def __init__(self, num_classes):
        super(CNN_8_x4, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 8, 5) # input features, output features, kernel size
        self.act1 = nn.ReLU()
        self.mp1 = nn.MaxPool2d(2, 2) # kernel size, stride
        
        self.conv2 = nn.Conv2d(8, 16, 5) # input features, output features, kernel size
        self.act2 = nn.ReLU()
        self.mp2 = nn.MaxPool2d(2, 2) # kernel size, stride
        
        self.fc = nn.Linear(16*45*45, num_classes) # 4x4 is the remaining spatial resolution here

    def forward(self, x):
        x = self.mp1(self.act1(self.conv1(x)))
        x = self.mp2(self.act2(self.conv2(x)))
        # The view flattens the output to a vector (the representation needed by the classifier)
        x = x.view(-1, 16*45*45)
        x = self.fc(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
is_gpu = torch.cuda.is_available()
batch_size = 8
workers = 4
path = '../../airbus/'
aug=True
resize_factor=4
empty_frac=1
test_size=0.1
    
dataset = AirbusDS(torch.cuda.is_available(), batch_size, workers, 
                   path, aug, resize_factor, empty_frac, test_size)

#model = CNN_8_x4(2).to(device)
model = torch.load('CNN_8_x4_bal.model', map_location=device).to(device)

In [3]:
predict(dataset.test_loader, model, device, predict_file='predictions05_v2.txt', threshold=0.5)

START PREDICTIONS 2018-10-13 11:05:27
Predicted 0 from 1951	
Predicted 1000 from 1951	
FINISH PREDICTIONS 2018-10-13 11:06:53
PREDICTIONS RUN TIME (s) 86.84315371513367


## Explore the predictions

In [6]:
pred05 = pd.read_csv('predictions05_v2.txt', delimiter='\t', names=['ImageId', 'Label']).fillna(-1)
pred04 = pd.read_csv('predictions04_v2.txt', delimiter='\t', names=['ImageId', 'Label']).fillna(-1)

In [19]:
test_set_size = pred05['Label'].count()
print('The new test set has {r} images.'.format(r=test_set_size))

The new test set has 15606 images.


### Threshold 0.5 

In [13]:
reject05 = pred05[pred05['Label'] == 0].count().values[0]

In [17]:
print('We reject {r} images or {d:.2f}%'.format(r=reject05, d=reject05/test_set_size*100))
print('We left {r} images or {d:.2f}%'.format(r=test_set_size-reject05, 
                                              d=(test_set_size-reject05)/test_set_size*100))

We reject 10192 images or 65.31%
We left 5414 images or 34.69%


### Threshold 0.4 

In [20]:
reject04 = pred04[pred04['Label'] == 0].count().values[0]

In [21]:
print('We reject {r} images or {d:.2f}%'.format(r=reject04, d=reject04/test_set_size*100))
print('We left {r} images or {d:.2f}%'.format(r=test_set_size-reject04, 
                                              d=(test_set_size-reject04)/test_set_size*100))

We reject 5439 images or 34.85%
We left 10167 images or 65.15%
