In [1]:
from __future__ import print_function
import zipfile
import os

import argparse
from tqdm import tqdm
import os
import PIL.Image as Image

import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import copy

import easydict

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**2,1), 'MB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024**2,1), 'MB')

Using device: cuda

GeForce GTX 1070
Memory Usage:
Allocated: 0.0 MB
Cached:    0.0 MB


In [3]:
def initialize_data(folder):
    train_zip = folder + '/train_images.zip'
    test_zip = folder + '/test_images.zip'
    if not os.path.exists(train_zip) or not os.path.exists(test_zip):
        raise(RuntimeError("Could not find " + train_zip + " and " + test_zip
              + ', please download them from https://www.kaggle.com/c/nyu-cv-fall-2018/data '))
    # extract train_data.zip to train_data
    train_folder = folder + '/train_images'
    if not os.path.isdir(train_folder):
        print(train_folder + ' not found, extracting ' + train_zip)
        zip_ref = zipfile.ZipFile(train_zip, 'r')
        zip_ref.extractall(folder)
        zip_ref.close()
    # extract test_data.zip to test_data
    test_folder = folder + '/test_images'
    if not os.path.isdir(test_folder):
        print(test_folder + ' not found, extracting ' + test_zip)
        zip_ref = zipfile.ZipFile(test_zip, 'r')
        zip_ref.extractall(folder)
        zip_ref.close()

    # make validation_data by using images 00000*, 00001* and 00002* in each class
    val_folder = folder + '/val_images'
    if not os.path.isdir(val_folder):
        print(val_folder + ' not found, making a validation set')
        os.mkdir(val_folder)
        for dirs in os.listdir(train_folder):
            if dirs.startswith('000'):
                os.mkdir(val_folder + '/' + dirs)
                for f in os.listdir(train_folder + '/' + dirs):
                    if f.startswith('00000') or f.startswith('00001') or f.startswith('00002'):
                        # move file to validation folder
                        os.rename(train_folder + '/' + dirs + '/' + f, val_folder + '/' + dirs + '/' + f)

In [9]:
address = 'C:/Users/tiany/Assignment_2'
initialize_data(address)

data_transforms = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.3337, 0.3064, 0.3171), (0.2672, 0.2564, 0.2629))
])

args = easydict.EasyDict({
    'batch_size': 64,
    'epochs':100,
    'lr':0.00005,
    'momentum':0.5,
    'seed':1,
    'log_interval':100,
    'weight_decay':1e-2
})

kwargs = {'num_workers': 12, 'pin_memory': True}

torch.manual_seed(args.seed)
train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(address + '/train_images',
                         transform=data_transforms),
    batch_size=args.batch_size, shuffle=True, **kwargs)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(address + '/val_images',
                         transform=data_transforms),
    batch_size=args.batch_size, shuffle=False, **kwargs)


In [5]:
nclasses = 43
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 100, kernel_size=3)
        self.conv2 = nn.Conv2d(100, 150, kernel_size=4)
        self.conv2_drop = nn.Dropout2d()
        self.conv3 = nn.Conv2d(150, 250, kernel_size=3)
        self.conv3_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(1000, 200)
        self.fc2 = nn.Linear(200, nclasses)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = F.relu(F.max_pool2d(self.conv3_drop(self.conv3(x)), 2))
        x = x.view(-1, 250*4)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [6]:
class Inception(nn.Module):
    """ Reference:
    GoogLeNet: 
    Going Deeper with Convolutions
    https://arxiv.org/abs/1409.4842v1
    Github Reference: https://github.com/xuchaoxi/pytorch-classification/blob/master/models/googlenet.py
    """
    def __init__(self, in_channels, _1x1_, _3x3reduce_, _3x3_, _5x5reduce_, _5x5_, pool_proj):
        super(Inception, self).__init__()
        self.con1x1 = nn.Sequential(
            nn.Conv2d(in_channels, _1x1_, kernel_size=1),
            nn.BatchNorm2d(_1x1_),
            nn.ReLU(True),
        )
        self.con1x1_3x3 = nn.Sequential(
            nn.Conv2d(in_channels, _3x3reduce_, kernel_size=1),
            nn.BatchNorm2d(_3x3reduce_),
            nn.ReLU(True),
            nn.Conv2d(_3x3reduce_, _3x3_, kernel_size=3, padding=1),
            nn.BatchNorm2d(_3x3_),
            nn.ReLU(True),
            #nn.Conv2d(_3x3_, _3x3_, kernel_size=3, padding=1),
            #nn.BatchNorm2d(_3x3_),
            #nn.ReLU(True),
        )
        self.con1x1_5x5 = nn.Sequential(
            nn.Conv2d(in_channels, _5x5reduce_, kernel_size=1),
            nn.BatchNorm2d(_5x5reduce_),
            nn.ReLU(True),
            nn.Conv2d(_5x5reduce_, _5x5_, kernel_size=3, padding=1),
            nn.BatchNorm2d(_5x5_),
            nn.ReLU(True),
            nn.Conv2d(_5x5_, _5x5_, kernel_size=3, padding=1),
            nn.BatchNorm2d(_5x5_),
            nn.ReLU(True),
        )
        self.mp3x3_con1x1 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1),
            nn.Conv2d(in_channels, pool_proj, kernel_size = 1),
            nn.BatchNorm2d(pool_proj),
            nn.ReLU(True),
        )
    def forward(self, inputs):
        output_con1x1 = self.con1x1(inputs)
        output_con1x1_3x3 = self.con1x1_3x3(inputs)
        output_con1x1_5x5 = self.con1x1_5x5(inputs)
        output_mp3x3_con1x1 = self.mp3x3_con1x1(inputs)
        output = torch.cat([output_con1x1,output_con1x1_3x3,output_con1x1_5x5,output_mp3x3_con1x1], dim=1)
        return output


class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        self.pre_layers = nn.Sequential(
            nn.Conv2d(3, 192, kernel_size=3, padding=1),
            nn.BatchNorm2d(192),
            nn.ReLU(True),
        )

        self._3a = Inception(192,  64,  96, 128, 16, 32, 32)
        self._3b = Inception(256, 128, 128, 192, 32, 96, 64)

        self._4a = Inception(480, 192,  96, 208, 16,  48,  64)
        self._4b = Inception(512, 160, 112, 224, 24,  64,  64)
        self._4c = Inception(512, 128, 128, 256, 24,  64,  64)
        self._4d = Inception(512, 112, 144, 288, 32,  64,  64)
        self._4e = Inception(528, 256, 160, 320, 32, 128, 128)

        self._5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self._5b = Inception(832, 384, 192, 384, 48, 128, 128)
        
        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
        self.avgpool = nn.AvgPool2d(8, stride=1)
        self.dropout = nn.Dropout2d(0.4)
        self.linear = nn.Linear(1024, 43)
    def forward(self, input):
        output_pl = self.pre_layers(input)
        output_3a = self._3a(output_pl)
        output_3b = self._3b(output_3a)
        output_mp1 = self.maxpool(output_3b)
        output_4a = self._4a(output_mp1)
        output_4b = self._4b(output_4a)
        output_4c = self._4c(output_4b)
        output_4d = self._4d(output_4c)
        output_4e = self._4e(output_4d)
        output_mp2 = self.maxpool(output_4e)
        output_5a = self._5a(output_mp2)
        output_5b = self._5b(output_5a)
        output_ap = self.avgpool(output_5b)
        output_drop = self.dropout(output_ap)
        output_ = output_drop.view(output_drop.size(0), -1)
        output_ = self.linear(output_)
        return output_

In [11]:
model_loc = 'C:/Users/tiany/Assignment_2/submissions/'
model_GooNet2 = GoogLeNet().to(device)
model_GooNet2.load_state_dict(torch.load(model_loc + "model_GoogLeNet_2-85-3825.pth"))
model_GooNet31 = GoogLeNet().to(device)
model_GooNet31.load_state_dict(torch.load(model_loc + "model_GoogLeNet_3_81-3835.pth"))
model_GooNet32 = GoogLeNet().to(device)
model_GooNet32.load_state_dict(torch.load(model_loc + "model_GoogLeNet_3_89-3848.pth"))
model_GooNet33 = GoogLeNet().to(device)
model_GooNet33.load_state_dict(torch.load(model_loc + "model_GoogLeNet_3_99-3848.pth"))
model_Basic = Net().to(device)
model_Basic.load_state_dict(torch.load(model_loc + "model_Net_5_89-3756.pth"))

<All keys matched successfully>

In [12]:
def generate_kaggle_single_model(filename, model):
    """
    This function gives a result of a single model
    For example: generate_kaggle_single_model("test.csv", model_GooNet2)
    filename: String || the name of the output files
    model: pytorch model
    """
    model.eval()
    test_dir = address + '/test_images'
    def pil_loader(path):
        # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
        with open(path, 'rb') as f:
            with Image.open(f) as img:
                return img.convert('RGB')
    output_file = open(filename, "w")
    output_file.write("Filename,ClassId\n")
    for f in tqdm(os.listdir(test_dir)):
        if 'ppm' in f:
            data = data_transforms(pil_loader(test_dir + '/' + f))
            data = data.view(1, data.size(0), data.size(1), data.size(2))
            data = Variable(data, volatile=True).to(device)
            output = model(data)
            pred = output.data.max(1, keepdim=True)[1]

            file_id = f[0:5]
            output_file.write("%s,%d\n" % (file_id, pred))
    output_file.close()

    print("Succesfully wrote " + filename + ', you can upload this file to the kaggle '
          'competition at https://www.kaggle.com/c/nyu-cv-fall-2019/')

In [13]:
def generate_kaggle_ensemble_model(filename, models):
    """
    This function gaves the ensemble model result
    For example: 
    models_test = [model_GooNet2, model_GooNet31, model_GooNet32, model_GooNet33, model_Basic]
    generate_kaggle_ensemble_model('test_ensemble.csv',models_test)
    ====================================================================================
    filename: String || the name of the output files
    models: a list with pytorch models as items
    """
    for model in models:
        model.eval()
    test_dir = address + '/test_images'
    def pil_loader(path):
        # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
        with open(path, 'rb') as f:
            with Image.open(f) as img:
                return img.convert('RGB')
    output_file = open(filename, "w")
    output_file.write("Filename,ClassId\n")
    for f in tqdm(os.listdir(test_dir)):
        if 'ppm' in f:
            data = data_transforms(pil_loader(test_dir + '/' + f))
            data = data.view(1, data.size(0), data.size(1), data.size(2))
            data = Variable(data, volatile=True).to(device)
            outputs = []
            for model in models:
                outputs.append(model(data))
            output = torch.mean(torch.stack(outputs),dim = 0)
            pred = output.data.max(1, keepdim=True)[1]

            file_id = f[0:5]
            output_file.write("%s,%d\n" % (file_id, pred))
    output_file.close()

    print("Succesfully wrote " + filename + ', you can upload this file to the kaggle '
          'competition at https://www.kaggle.com/c/nyu-cv-fall-2019/')

In [None]:
models_test = [model_GooNet2, model_GooNet31, model_GooNet32, model_GooNet33, model_Basic]
generate_kaggle_ensemble_model('test_ensemble.csv',models_test)

  app.launch_new_instance()
 80%|████████████████████████████████████████████████████████████▌               | 10074/12631 [15:06<03:42, 11.47it/s]