<a href="https://colab.research.google.com/github/tanish-g/age-detection/blob/master/age_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

In [None]:
url_test = 'https://datahack-prod.s3.amazonaws.com/test_zip/test_Bh8pGW3.zip'
url_train  ='https://datahack-prod.s3.amazonaws.com/train_zip/train_DETg9GD.zip'
url_sample='https://datahack.analyticsvidhya.com/contest/practice-problem-age-detection/download/sample-submission'

In [None]:
url_sample='https://datahack-prod.s3.amazonaws.com/sample_submission/sample_submission_sDO3m7O.csv'
urllib.request.urlretrieve(url_sample,'sample_submission.csv')

In [None]:
import urllib.request
urllib.request.urlretrieve(url_train, 'train.zip')
urllib.request.urlretrieve(url_test, 'test.zip')
urllib.request.urlretrieve(url_sample,'sample_submission.csv')

In [None]:
!unzip -q train.zip
!unzip -q test.zip
!rm train.zip test.zip

In [None]:
import os
import cv2
from PIL import Image
import time
import copy
import warnings
import random
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.nn import functional as F
import torchvision
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from matplotlib import pyplot as plt
import torchvision.transforms as transforms
from albumentations import (HorizontalFlip,VerticalFlip, ShiftScaleRotate, Normalize, Resize, Compose, GaussNoise,RandomRotate90,Transpose,RandomBrightnessContrast,RandomCrop)
from albumentations.pytorch import ToTensor
import albumentations as albu
import matplotlib.image as mpi
from sklearn.metrics import f1_score
warnings.filterwarnings("ignore")
seed = 69
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [None]:
df5 = pd.read_csv('sample_submission.csv')

In [None]:
df5.keys()

In [None]:
df = pd.read_csv("train.csv")
df.keys()

In [None]:
df1 = df['Class']
df2 = df["ID"]
df1 = pd.get_dummies(df1)
df = pd.concat([df2,df1], axis=1)
df.head()

In [None]:
df_train,df_val = train_test_split(df,test_size=0.2,random_state=42)

In [None]:
class Agedetection(Dataset):

  def __init__(self,df,root,phase):
    self.df = df
    self.length = df.shape[0]
    self.root = root
    if phase=="train":
        self.transforms = albu.Compose([
            albu.Resize(256,256),
            ##albu.RandomCrop(256,256),
            albu.HorizontalFlip(p=0.5),
            albu.RandomBrightness(),
            ##albu.VerticalFlip(p=0.5),
            albu.RandomBrightnessContrast(),
            albu.HueSaturationValue(-1,1),
            albu.RandomBrightness(),
            ##albu.CenterCrop(128,128),
            ##albu.Cutout(),
            albu.RGBShift(),
            albu.Rotate(limit=(-45,45)),
            albu.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)),
        ])
    elif phase=="val":
        self.transforms = albu.Compose([
            albu.Resize(256,256),
            albu.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225)),
        ])

  def __getitem__(self,index):
    label = self.df.iloc[index,1:]
    label = label.to_numpy()
    image_id = self.df.iloc[index,0]
    path = os.path.join(self.root,str(image_id))
    img = plt.imread(path)
    img = self.transforms(image=np.array(img))
    img = img['image']
    img = np.transpose(img,(2,0,1)).astype(np.float32)
    img = torch.tensor(img, dtype = torch.float)
    label = np.argmax(label)
    return img,label
  
  def __len__(self):
    return self.length 
  
 

In [None]:
traindata = Agedetection(df_train,root = "Train", phase="train")
valdata = Agedetection(df_val,root = "Train", phase="val")
trainloader = DataLoader(traindata,batch_size = 48,num_workers=4)
valloader = DataLoader(valdata,batch_size = 48,num_workers=4)

In [None]:
dataiter = iter(trainloader)
image,label = dataiter.next()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
def show_img(img):
    plt.figure(figsize=(18,15))
    img = img / 2 + 0.5  
    npimg = img.numpy()
    npimg = np.clip(npimg, 0., 1.)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

show_img(torchvision.utils.make_grid(image))

In [None]:
print(image.shape)

In [None]:
!pip install timm
import timm

In [None]:
from torchvision import models
resnet = timm.models.efficientnet_b3(pretrained=True,num_classes=3).to(device)

In [None]:
#if for freezing param.requires_grad=True
for param in resnet.parameters():
    param.requires_grad=False
fc_inputs = resnet.fc.in_features
resnet.fc = nn.Linear(fc_inputs,3)

In [None]:
!pip install pytorch_ranger
from pytorch_ranger import Ranger

In [None]:
from torch.optim import lr_scheduler
criterion = nn.CrossEntropyLoss()
optimizer = Ranger(resnet.parameters(), lr=0.001,weight_decay=0.001)
scheduler = ReduceLROnPlateau(optimizer,factor=0.33, mode="min", patience=3)

In [None]:
from tqdm import tqdm

In [None]:
def train_model(dataloaders,model, criterion, optimizer, scheduler, num_epochs):
    since = time.time()
    dataset_sizes = {'train': len(dataloaders['train'].dataset), 
                     'val': len(dataloaders['val'].dataset)}
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    number_of_iter = 0
    acc_train = []
    acc_val = []
    loss_train = []
    loss_val = []
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            current_loss = 0.0
            current_corrects = 0
            tk = tqdm(dataloaders[phase], total=len(dataloaders[phase]), position=0, leave=True)
            for i,(inputs, labels) in enumerate(tk):
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                current_loss += loss.item() * inputs.size(0)
                current_corrects += torch.sum(preds == labels.data)

            epoch_loss = current_loss / dataset_sizes[phase]
            epoch_acc = current_corrects.double() / dataset_sizes[phase]
            if phase=="train":
                acc_train.append(epoch_acc)
                loss_train.append(epoch_loss)
            else:
                acc_val.append(epoch_acc)
                loss_val.append(epoch_loss)
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_since = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_since // 60, time_since % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(best_model_wts)
    
    
    return model,acc_val,acc_train,loss_train,loss_val

In [None]:
resnet = resnet.to(device)
dataloaders = {"train":trainloader,"val":valloader}
num_epochs=25
start_time = time.time()
model,acc_val,acc_train,loss_train,loss_val = train_model(dataloaders, resnet, criterion, optimizer, scheduler, num_epochs=num_epochs)

In [None]:
epoch = []
for x in range(12):
    epoch.append(x)
plt.plot(epoch,loss_train,label = 'TrainLoss')
plt.plot(epoch,loss_val,label = 'ValLoss')
plt.legend()
plt.show()

In [None]:
torch.save({'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),'scheduler.state_dict':scheduler.state_dict()},f'res152.pth')

In [None]:
output = pd.DataFrame(index=sub.index,columns = sub.keys())
output['ID'] = sub['ID']

In [None]:
sub=sub[["ID","Class"]]
sub.head()

In [None]:
df5

In [None]:
sub=df5[['ID','Class']]

In [None]:
testdata = Agedetection(sub,root="Test",phase='val')
testloader = DataLoader(testdata,batch_size=16,num_workers=2)

In [None]:
def test_submission(model):
    since = time.time()
    sub_output = []
    model.train(False)
    for data in testloader:
        inputs,labels = data
        inputs = inputs.to(device)
        outputs = model(inputs)
        sub_output.append(outputs.data.cpu().numpy())
    sub_output = np.concatenate(sub_output)
    for idx,row in enumerate(sub_output.astype('float')):
        sub_output[idx] = np.exp(row)/np.sum(np.exp(row))
    output.loc[:,1:] = sub_output
    print()
    time_elapsed = time.time() - since
    print('Run complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
        

In [None]:
#model = model.to(device)
test_submission(resnet)

In [None]:
output.head()

In [None]:
output.to_csv("res152.csv", index=False)