In [2]:
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
import helper

ModuleNotFoundError: No module named 'torch'

In [None]:
# Configs

In [None]:
TRAIN_IMG_DIR = 'data/training/'
TEST_IMG_DIR = 'data/test/'

TRAIN_CSV = 'data/training_frames_keypoints.csv'
TEST_CSV = 'data/test_frames_keypoints.csv'

DEVICE = 'cpu'

BATCH_SIZE = 16
IMG_SIZE = 140

LR = 0.001
EPOCHS = 2
MODEL_nAME = 'resnet18'

In [None]:
# Understand Facial Keypoint dataset

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

train_df.head()

In [None]:
idx = 2

img_name = train_df['Unnamed: 0'].iloc[idx]
img_path = TRAIN_IMG-DIR + img_name
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

key = train_df.iloc[idx][1:].to_numpy().reshape(-1, 2)
print(key)

plt.imshow(img)
plt.scatter(key[:,0], key[:,1], s = 4, c = 'r')

In [None]:
train_df, valid_df = train_test_split(train_df, test_size = 0.2, random_state = 42)

In [None]:
# Create Custom Facial Keypoint Dataset

In [None]:
import albumentations as A

In [None]:
class FacialKeyDataset(torch.utils.data.Dataset):
    
    def __init__(self, df, img_data_dir, augmentations = None):
        self.df = df
        self.img_data_dir = img_data_dir
        self.augmentations = augmentations
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        
        img_name = self.df['Unnamed: 0'].iloc[idx]
        img = cv2.imread(self.img_data_dir + img_name)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        key = self.df.iloc[idx][1:].to_numpy().reshape(-1,2)
        
        if self.augmentations != None:
            augmented_data = self.augmentations(image = img, keypoints = key)
            img = torch.from_numpy(augmented_data['image']).float()
            key = torch.tensor(augmented_data['keypoints']).float()
            
        return img.permute(2,0,1), key.view(-1) # (Height,Width,Channel) -> (reveresed CHannel,Height,Width)
                                                # Output dimension is back (68,2) -> (136)

In [None]:
train_augs = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(),
    #A.HorizontalFlip(p=0.5)
], keypoint_params=A.KeypointParams(format='xy', remove_invisible = False))

valid_test_augs = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(),
    #A.HorizontalFlip(p=0.5)
], keypoint_params=A.KeypointParams(format='xy', remove_invisible = False))

In [None]:
trainset = FacialKeyDataset(train_df, TRAIN_IMG_DIR, train_augs)
validset = FacialKeyDataset(valid_df, TRAIN_IMG_DIR, valid_test_augs)
testset = FacialKeyDataset(valid_df, TEST_IMG_DIR, valid_test_augs)

In [None]:
print("Total no. of examples present in trainset : {}".format(len(trainset)))
print("Total no. of examples present in validset : {}".format(len(validset)))

In [None]:
img, key = trainset[13]
helper.imshow_with_key(img, key)

# Load Dataset into Batches 

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size = BATCH_SIZE)
validloader = torch.utils.data.DataLoader(validset, batch_size = BATCH_SIZE)

In [None]:
print("Total no. batches in trainloader : {}".format(len(trainloader)))
print("Total no. batches in validloader : {}".format(len(validloader)))

In [None]:
for data in trainloader:
    image, key = data
    break;

In [None]:
print("Total no. batches in trainloader : {}".format(len(trainloader)))
print("Total no. batches in validloader : {}".format(len(validloader)))

# Create Model 

In [None]:
import timm

In [None]:
class FacialKeyModel(torch.nn.Module):
    
    def __init__(self, model_name = MODEL_NAME):
        super(FacialKeyModel, self).__init__()
        
        print("Loading Backbone : {}".format(model_name))
        self.backbone = timm.create_model(model_name, pretrained = True, num_classes = 136)
        
    def forward(self, images, key = None):
        
        logits = self.backbone(images)
        
        if key != None:
            return logits, torch.nn.MSELoss()(logits, key)
        
        return logits  # return only outputs

In [None]:
model = FacialKeyModel()
model.to(DEVICE);

model(torch.rand(16,3,140,140)).shape

# Trainer and Evaluator

In [None]:
def train_fn(model, trainloader, optimizer):
    
    train_loss = 0.0
    
    model.train()
    
    for data in tqdm(trainloader):   # useful to see progression of the loop
        
        images, keys = data
        images, keys = images.to(DEVICE), keys.to(DEVICE)
        
        output, loss = model(images, keys)
        
        optimizer.zer0_grad()
        loss.backward() # dw, db
        optimizer.step() #useful to update w = w - lr*dw, b = ...
        
        train_loss += loss.item()
        
    return train_loss / len(trainloader)

In [None]:
def eval_fn(model, validloader):
    
    valid_loss = 0.0
    
    model.eval() # need to make sure model is not using dropout layers
    
    with torch.no_grad():    
        for data in tqdm(validloader):   # useful to see progression of the loop
        
            images, keys = data
            images, keys = images.to(DEVICE), keys.to(DEVICE)
        
            output, loss = model(images, keys)
            valid_loss += loss.item()
        
        return valid_loss / len(validloader)

# Training model 

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
best_valid_loss = np.Inf

for i in range(EPOCHS): # only two because one will take a lot of time
    
    avg_train_loss = train_fn(model, trainloader, optimizer)
    avg_valid_loss = eval_fn(model, validloader)
    
    if avg_valid_loss < best_valid_loss:
        torch.save(model.state_dict(), 'FacialKeyModel.pt')
        best_valid_loss = avg_valid_loss
        
    print("Epoch : {} Train_loss : {}".format(i+1, avg_train_loss))
    print("Epoch : {} Valid_loss : {}".format(i+1, avg_valid_loss))

# Visualizing Predictions

In [None]:
index = 85
image, key = testset[index]

model.load_state_dict(torch.load('[Colab]FacialKeyModel.pt', map_location = DEVICE))
model.eval()
with torch.no_grad():
    image = image.to(DEVICE)
    out_key = model(image.unsqueeze(0)) # (tensor expects bs to be in shape of bedsize, channel, height, width) but the image is channel, height, width
    helper.compare_keypoints(image, key, out_key)

# Optional Task 