In [None]:
from __future__ import print_function, division
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import torch.nn.functional as F
from tqdm.notebook import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import time
import os

In [None]:
TRAIN_IMG_PATH = "../input/actorsdataset/train/Train/"
TEST_IMG_PATH = "../input/actorsdataset/test/Test/"
LABELS_CSV_PATH = "../input/actorsdataset/train.csv"
SAMPLE_SUB_PATH = "../input/actorsdataset/test.csv"

In [None]:
# Creating dict for one hot encoding 
agedict = {'YOUNG':0, 'MIDDLE':1, 'OLD':2}
revdict = {0:'YOUNG', 1:'MIDDLE', 2:'OLD'}

In [None]:
df = pd.read_csv(LABELS_CSV_PATH)
df['Class'] = df['Class'].map(agedict)

In [None]:
# 90-10 split for train, test
cut = int(len(df)*0.9)
train, test = df[:cut], df[cut:].reset_index(drop=True)

In [None]:
class actorsDataset(Dataset):
    
    def __init__(self, img_dir, labels, transform=None):
        self.labels = labels
        self.dir = img_dir
        self.transform = transform
        
    def __len__(self):
        return(len(self.labels))
    
    def __getitem__(self, i):
        img = os.path.join(self.dir, self.labels.ID[i])
        image = Image.open(img)
        label = self.labels['Class'][i]
        
        if self.transform:
            image = self.transform(image)
        
        return [image, label]
        

In [None]:
data_transform = transforms.Compose([
    transforms.RandomResizedCrop(128),
    transforms.ToTensor()
])

In [None]:
train_df = actorsDataset(TRAIN_IMG_PATH, train, data_transform)
test_df = actorsDataset(TRAIN_IMG_PATH, test, data_transform)

In [None]:
datasets = {'train':train_df, 'val':test_df}

#### DataLoader

**While training a model, we typically want to pass samples in “minibatches”, reshuffle the data at every epoch to reduce model overfitting, and use Python’s multiprocessing to speed up data retrieval.**

DataLoader is an iterable that abstracts this complexity for us in an easy API.

In [None]:
trainloader = DataLoader(train_df, batch_size=32, shuffle=True)
testloader = DataLoader(test_df, batch_size=32, shuffle=True)

In [None]:
dataloader = {'train':trainloader, 'val':testloader}

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    
    # A state_dict() is simply a python ordered dictionary object that maps each parameter to its parameter tensor (torch.Tensor object). 
    # The keys of this ordered dictionary are the names of the parameters, which can be used to access the respective parameter tensors.
    best_model = model.state_dict()
    best_accu = 0.0
    
    for epoch in tqdm(range(num_epochs)):
        
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-*-'*10)
        
        since_epoch = time.time()
        for phase in ['train','val']:

                
            run_loss = 0.0
            corr = 0
            
            for data in tqdm(dataloader[phase]):
                inputs, labels = data
                
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                # Sets the gradients of all optimized torch.Tensors to zero.
                optimizer.zero_grad()
                
                # Context-manager that sets gradient calculation to on or off
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    _,preds = torch.max(outputs,1)
                    loss = criterion(outputs, labels)
                    
                    if phase=='train':
                        loss.backward()
                        optimizer.step()
                        
                run_loss += loss.item() * inputs.size(0)
                corr += torch.sum(preds==labels.data)
            
            if phase=='train':
                # If not called the learning rate won’t be changed 
                # stays at the initial value.
                scheduler.step()
                model.train(True)
            else:
                model.train(False)
                
            epoch_loss = run_loss / len(datasets[phase])
            epoch_accu = corr.double() / len(datasets[phase])
            
            time_epoch = time.time() - since_epoch
            print('{} Loss: {:.4f} Acc: {:.4f} in {:.0f}m {:.0f}s'.format(phase, epoch_loss, epoch_accu, time_epoch // 60, time_epoch % 60))

            
            if phase=='val' and epoch_accu > best_accu:
                best_accu = epoch_accu
                best_model = model.state_dict()
                
        print()
                
    time_elaps = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elaps//60, time_elaps%60))
    print('Best Val Acc: {:4f}'.format(best_accu))
    
    return model
                        

### optimizer.zero_grad()

**we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes**

##### Deafult Behavior is Useful for RNNs

**Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly. Else the gradient would point in some other direction than the intended direction towards the minimum**

### loss.backward() || optimizer.step()

When you call loss.backward(), all it does is compute gradient of loss w.r.t all the parameters in loss that have requires_grad = True and store them in parameter.grad attribute for every parameter.

optimizer.step() updates all the parameters based on parameter.grad

In [None]:
class actorCNN(nn.Module):
    
    def __init__(self):
        
        super(actorCNN, self).__init__()
        
        self.model = models.resnet18(pretrained=True)
        
        for params in self.model.parameters():
            params.requires_grad=False
            
        self.model.fc = nn.Linear(512, 512)
        self.l = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(512, 3)
        
        self.classifier = nn.Sequential(self.model, self.l, self.fc2)
        
    def forward(self, x):
        return self.classifier(x)
    


In [None]:
num_epochs = 25
num_classes = 3
batch_size = 128
learning_rate = 0.002
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

In [None]:
model = actorCNN().to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler)

In [None]:
submission_df = pd.read_csv(SAMPLE_SUB_PATH)

output_df = pd.DataFrame(index=submission_df.index, columns = submission_df.keys())

In [None]:
output_df['ID'] = submission_df['ID']
output_df['Class'] = [0]*len(submission_df)

In [None]:
submission_df = actorsDataset(TEST_IMG_PATH, output_df, data_transform)
sub_loader = DataLoader(submission_df, batch_size=1, shuffle=False)

In [None]:
def test_sub(model):
    
    prediction = []
    model.train(False)
    
    for data in sub_loader:
        inputs, labels = data
        
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = model(inputs)
        _, pred = torch.max(outputs.data, 1)
        prediction.append(int(pred))
        
    return prediction

In [None]:
output_df['Class'] = test_sub(model_ft)

output_df['Class'] = output_df['Class'].map(revdict)
output_df.to_csv('submission.csv', index=False)