In [None]:
from __future__ import print_function, division
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import pandas as pd
from skimage import io, transform
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, utils, models
from torch.utils.data.sampler import SubsetRandomSampler
import time
import copy

class TrainDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        self.data = pd.read_csv('training_solutions_rev1/training_solutions_rev1.csv',\
                                nrows =  43136)
        self.root_dir = root_dir
    def __len__(self):
        return len(self.data)  
    def __getitem__(self,idx):
        img_no = os.path.join(self.root_dir,(str(self.data.iloc[idx, 0]))+'.jpg')
        image = io.imread(img_no)
        img = image[100:324,100:324]
        probs = np.array([self.data.iloc[idx,1:].values]).astype('float')
        img = img.transpose((2,0,1))
        sample = {'img': img, 'p': probs}
        return sample

class ValDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        self.data = pd.read_csv('training_solutions_rev1/training_solutions_rev1.csv',\
                                skiprows = 43136, nrows =  12288)
        self.root_dir = root_dir
    def __len__(self):
        return len(self.data)    
    def __getitem__(self,idx):
        img_no = os.path.join(self.root_dir,(str(self.data.iloc[idx, 0]))+'.jpg')
        image = io.imread(img_no)
        img = image[100:324,100:324]
        probs = np.array([self.data.iloc[idx,1:].values]).astype('float')
        img = img.transpose((2,0,1))
        sample = {'img': img, 'p': probs}
        return sample

class TestDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        self.data = pd.read_csv('training_solutions_rev1/training_solutions_rev1.csv',\
                                skiprows = 55424, nrows = 6144)
        self.root_dir = root_dir
    def __len__(self):
        return len(self.data)    
    def __getitem__(self,idx):
        img_no = os.path.join(self.root_dir,(str(self.data.iloc[idx, 0]))+'.jpg')
        image = io.imread(img_no)
        img = image[100:324,100:324]
        probs = np.array([self.data.iloc[idx,1:].values]).astype('float')
        img = img.transpose((2,0,1))
        sample = {'img': img, 'p': probs}
        return sample

transformed_train_dataset = TrainDataset(csv_file = \
                                        'training_solutions_rev1/training_solutions_rev1.csv',\
                                         root_dir = 'images_training_rev1/')
transformed_val_dataset = ValDataset(csv_file \
                                     = 'training_solutions_rev1/training_solutions_rev1.csv',\
                                     root_dir = 'images_training_rev1/')
transformed_test_dataset = TestDataset(csv_file = \
                                       'training_solutions_rev1/training_solutions_rev1.csv',\
                                       root_dir = 'images_training_rev1/')

dataloader_train = DataLoader(transformed_train_dataset,\
                              batch_size = 32, shuffle = False, num_workers = 0)
dataloader_val = DataLoader(transformed_val_dataset,\
                            batch_size = 32, shuffle = False, num_workers = 0)
dataloader_test = DataLoader(transformed_test_dataset,\
                             batch_size = 32, shuffle = False, num_workers = 0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(str(device))

for i,batch in enumerate(dataloader_train):
    batch['img'] = batch['img'].to(device)
    batch['p'] = batch['p'].to(device)
    
for j,bat in enumerate(dataloader_val):
    bat['img'] = bat['img'].to(device)
    bat['p'] = bat['p'].to(device)

dataset = {'train' : dataloader_train, 'val' :\
           dataloader_val} # define a composite dataset

def train_model(model, criterion,optimizer,scheduler, num_epochs):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(num_epochs):
        print('\n Epoch {}/{}'.format(epoch + 1, num_epochs))
        print("-"*50)
        
        
        for phase in ['train','val']:
            if phase == 'train':
                scheduler.step()
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
        
            for i,batch in enumerate(dataset[phase]):
                inputs = batch['img'].to(device)
                labels = batch['p'].to(device)
            
                optimizer.zero_grad();
            
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.type(torch.cuda.FloatTensor))
                    loss = criterion(outputs,labels.type(torch.cuda.FloatTensor))
                
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                 
                running_loss += loss.item()*32
        
            epoch_loss = running_loss / len(dataset[phase])
            if phase == 'val' and epoch == 1:
                best_loss = epoch_loss
            elif phase == val and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
            
            print('{} Loss: {:.4f}'.format(phase,epoch_loss))
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format\
          (time_elapsed // 60, time_elapsed % 60))
    print('Best Error: {:4f}'.format(best_loss))
    
    model.load_state_dict(best_model_wts)
    return model

for k,b in enumerate(dataloader_test):
    b['img'] = b['img'].to(device)
    b['p'] = b['p'].to(device)                                                                                                                                   

def test_model(model, criterion):
    for i,batch in enumerate(dataloader_test):
        inputs = batch['img'].to(device)
        labels = batch['p'].to(device)
    outputs = model(inputs.type(torch.cuda.FloatTensor))
    loss = criterion(outputs,labels.type(torch.cuda.FloatTensor))
    return loss, outputs,labels

model_ft = models.resnet18(pretrained = True)

num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 500)
D_in, H, D_out = 500, 100, 37
model = torch.nn.Sequential(
    model_ft,
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(H, D_out),
    torch.nn.ReLU()
)
model = model.to(device)
criterion = nn.MSELoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model = train_model(model, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=8)

loss, outputs,labels = test_model(model, criterion)
print('Test Error = {:4f}'.format(loss.item()))