In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#import the libraries
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import torch
import PIL

In [None]:
#loading the annotations file
key = pd.read_csv('../input/input-key/training_frames_keypoints.csv')
key.head()


In [None]:
class ToTensor(object):

    def __call__(self, sample):
        image, key_pts = sample['image'], sample['keypoints']
         
        # if image has no grayscale color channel, add one
        if(len(image.shape) == 2):
            # add that third color dim
            image = image.reshape(image.shape[0], image.shape[1], 1)
            
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = image.transpose((2, 0, 1))
        
        return {'image': torch.from_numpy(image),
                'keypoints': torch.from_numpy(key_pts)}
class Normalize(object):      

    def __call__(self, sample):
        image, key_pts = sample['image'], sample['keypoints']
        
        image_copy = np.copy(image)
        key_pts_copy = np.copy(key_pts)

        # convert image to grayscale
        image_copy = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # scale color range from [0, 255] to [0, 1]
        image_copy=  image_copy/255.0
        
        # scale keypoints to be centered around 0 with a range of [-1, 1]
        # mean = 100, sqrt = 50, so, pts should be (pts - 100)/50
        key_pts_copy = (key_pts_copy - 100)/50.0


        return {'image': image_copy, 'keypoints': key_pts_copy}
class Rescale(object):
    

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, key_pts = sample['image'], sample['keypoints']

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        img = cv2.resize(image, (new_w, new_h))
        
        # scale the pts, too
        key_pts = key_pts * [new_w / w, new_h / h]

        return {'image': img, 'keypoints': key_pts}
class RandomCrop(object):

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, sample):
        image, key_pts = sample['image'], sample['keypoints']

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        image = image[top: top + new_h,
                      left: left + new_w]

        key_pts = key_pts - [left, top]

        return {'image': image, 'keypoints': key_pts}



In [None]:
#ist column contain name of the file image and all other columns 
#contain the x and y axis of keypoints
#so we will separate them
#create a function to make a dataset of form A sample of our dataset will be a dictionary {'image': image, 'keypoints': key_pts}
from torch.utils.data import DataLoader , Dataset
from torchvision import transforms, utils

class facialdataset(Dataset):
    def __init__(self,csv_file , root_dir , transform  = None):
        self.key_csv = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
    #function to return the length of keypoints dictionary
    def __len__(self):
        return len(self.key_csv)
    def __getitem__(self,idx):
        #append root dir and image name from csv to fetch image
        image_name = os.path.join(self.root_dir , self.key_csv.iloc[idx,0])
        image = mpimg.imread(image_name)
        #remove last channel if image have 4 channels instead of 3
        if image.shape[2] == 4:
            image = image[:,:,0:3]
        #now convert the cords in matrix and the reshape
        key_cords = self.key_csv.iloc[idx,1:].to_numpy()
        key_cords = key_cords.astype('float').reshape(-1,2)
        dictionary = {'image':image , 'keypoints':key_cords}
        
        #applying transform is not none:
        if self.transform:
            dictionary= self.transform(dictionary)
        return dictionary
# define the data tranform
# order matters! i.e. rescaling should come before a smaller crop
train_transforms = transforms.Compose([Rescale(250),
                                       #ImgAugTransform(),
                                        RandomCrop(224),
                                         Normalize(),
                                       #lambda x: PIL.Image.fromarray(x),
                                        #transforms.ToPILImage(),
                                          #transforms.RandomVerticalFlip(),
                                      ToTensor()])

In [None]:
#after creating the class, we will pass the images and labels directory
#in the class to get the dictionary
face_dataset_train = facialdataset(csv_file='/kaggle/input/input-key/training_frames_keypoints.csv',
                            root_dir='/kaggle/input/input-key/training/training/',transform=train_transforms)
face_dataset_test = facialdataset(csv_file = '../input/input-key/test_frames_keypoints.csv',
                                 root_dir = '../input/input-key/test/test/',
                                 transform=train_transforms)

In [None]:
print('length of train data' , len(face_dataset_train))
print('length of test data' , len(face_dataset_test))

In [None]:
print("train" , face_dataset_train)

In [None]:
print('number of images in train' , len(face_dataset_train))
print('number of images in test' , len(face_dataset_test))
for i in range(1,5):
    sample = face_dataset_test[i]
    print(i , sample['image'].size() , sample['keypoints'].size() )

In [None]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
#TO INITIALIZE WEIGHTS OF NETWORK
import torch.nn.init as I



In [None]:
from torch import nn
import torch.nn.functional as F
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # This network takes in a square (same width and height), grayscale image as input
        # and it ends with a linear layer that represents the keypoints
        # 1 input image channel (grayscale), 32 output channels/feature maps, 5x5 square convolution kernel        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(5, 5), stride=2, padding=1) 
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(4, 4), stride=2, padding=1) 
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(3, 3), stride=1, padding=1)  
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=(3, 3), stride=1, padding=1)  
        self.conv5 = nn.Conv2d(in_channels=384, out_channels=512, kernel_size=(3, 3), stride=1, padding=0)  
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(3, 3), stride=1, padding=0)  
        # maxpooling layers, multiple conv layers, fully-connected layers,
        # and other layers (such as dropout) to avoid overfitting
        # max-pool layer 
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        # linear layers
        self.fc1 = nn.Linear(in_features=4096, out_features=2048)
        self.fc2 = nn.Linear(in_features=2048, out_features=512)
        self.fc3 = nn.Linear(in_features=512, out_features=136)
        # dropout 
        self.dropout2 = nn.Dropout(p=0.2)
        self.dropout4 = nn.Dropout(p=0.4)
        self.dropout6 = nn.Dropout(p=0.6)      
    
    def forward(self, x):
        # x is the input image
        x = F.elu(self.conv1(x))
        x = F.elu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout2(x)
        x = F.elu(self.conv3(x))
        x = F.elu(self.conv4(x))
        x = self.pool(x)
        x = self.dropout2(x)
        x = F.elu(self.conv5(x))
        x = F.elu(self.conv6(x))
        x = self.pool(x)
        x = self.dropout2(x)
        # flatten
        x = x.view(x.size(0), -1) 
        # fully connected layers
        x = F.elu(self.fc1(x))
        x = self.dropout4(x)
        x = F.elu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)        
        # a modified x, having gone through all the layers of your model
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
face_model = Net()
face_model.to(device)
#if torch.cuda.is_available():
 #   face_model.cuda()
print(face_model)

In [None]:
batch_size = 64

train_loader = DataLoader(face_dataset_train, 
                          batch_size=batch_size,
                          shuffle=True, 
                          )
val_loader = DataLoader(face_dataset_test, 
                          batch_size=batch_size,
                          shuffle=True, 
                          )
print(train_loader)

In [None]:
import torch.optim as optim
#loss = nn.NLLLoss()
loss = nn.SmoothL1Loss()
#loss = nn.MSELoss()
opt = optim.Adam(params = face_model.parameters() , lr = 0.001)
#opt = optim.Adam(params = face_model.parameters() , lr = 0.001)

In [None]:
 def train_mod(epochs):
    
    #epochs = 5
    correct = 0
    face_model.train()
    training_loss=[]
    for epoch in range(epochs):
        running_loss = 0.0
        for i , data in enumerate(train_loader):
            images = data['image'].cuda()
           # images = images.cuda()
           # print(images.shape)
            cords = data['keypoints'].cuda()
           # cords = cords.cuda()
            
            #flattening the points
            cords_final = cords.view(cords.size(0),-1)#.cuda()
            #as we have to calculate regression loss , so we'll convert into float values
            #cords_final = cords_final.type(torch.FloatTensor)
            cords_final = cords_final.type(torch.cuda.FloatTensor)
            #images = images.type(torch.FloatTensor)
            images = images.type(torch.cuda.FloatTensor)
            # images = images.view(batch_size,images.shape[0],images.shape[1],images.shape[2])
            #print(images.shape)
            #forward pass
            out = face_model(images)
            #loss calculation
            loss_ = loss(out,cords_final)
            #initializing optimizer
            opt.zero_grad()
            #backward pass
            loss_.backward()
            #update weights
            opt.step()
            #adding loss
            running_loss+=loss_.item()
           # correct += (out == cords_final).float().sum()
        
            if i % 10 == 9:    # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, i+1, running_loss/10))
                accuracy = 100 * correct 
               # print(" train Accuracy = {}".format(accuracy/batch_size))
                running_loss = 0
               # accuracy=0
            training_loss.append(running_loss)
            
    print('Finished Training')
    return training_loss

In [None]:
training_loss = train_mod(60)

In [None]:
import os
#os.mkdir("/kaggle/working/saved_models")

#creating a dictionary with model parameters for saving
checkpoint = {'model': face_model,
                            'state_dict': face_model.state_dict(),
                        'optimizer' : opt.state_dict()}
        
        #saving the model
torch.save(checkpoint, '/kaggle/working/saved_models/' + 'model0.pth')
print("Model saved")

In [None]:
#testing the model
def test_model():
    face_model.eval()
    # iterate through the test dataset
    with torch.no_grad():
        
        for i, sample in enumerate(val_loader):
        
        # get sample data: images and ground truth keypoints
            images = sample['image'].cuda()
            key_ = sample['keypoints'].cuda()

        # convert images to FloatTensors
            images = images.type(torch.cuda.FloatTensor)

        # forward pass to get net output
            pred_key = face_model(images)
        
        # reshape to batch_size x 68 x 2 pts
            pred_key = pred_key.view(pred_key.size()[0], 68, -1)
        
        # break after first image is tested
            if i <20:
                return images, pred_key, key_

val_img , pred_key , true_key = test_model()

print(val_img.data.size())
print(pred_key.data.size())
print(true_key.size())
#print(val_img , pred_key , true_key)

In [None]:
def show_all_keypoints(image, predicted_key_pts, gt_pts=None):
    # image is grayscale
    plt.imshow(image, cmap='gray')
    plt.scatter(predicted_key_pts[:, 0], predicted_key_pts[:, 1], s=20, marker='.', c='m')
    # plot ground truth points as green pts
    if gt_pts is not None:
        plt.scatter(gt_pts[:, 0], gt_pts[:, 1], s=15, marker='.', c='g')



def visualize_output(test_images, test_outputs, gt_pts=None, batch_size=5):

    for i in range(30):
        plt.figure(figsize=(100,100))
        ax = plt.subplot(3,25, i+1)

        # un-transform the image data
        image = test_images[i].cpu().data   # get the image from it's list of images
        image = image.numpy()   # convert to numpy array from a Tensor
        image = np.transpose(image, (1, 2, 0))   # transpose to go from torch to numpy image

        # un-transform the predicted key_pts data
        predicted_key_pts = test_outputs[i].cpu().data
        predicted_key_pts = predicted_key_pts.numpy()
        # undo normalization of keypoints  
        predicted_key_pts = predicted_key_pts*50.0+100
        
        # plot ground truth points for comparison, if they exist
        ground_truth_pts = None
        if gt_pts is not None:
            ground_truth_pts = gt_pts[i]         
            ground_truth_pts = ground_truth_pts*50.0+100
        
        # call show_all_keypoints
        show_all_keypoints(np.squeeze(image),predicted_key_pts)
            
        plt.axis('off')

    plt.show()
    
visualize_output(val_img, pred_key)#, true_key)

In [None]:
def test_mod(epochs):
    
    epochs = 2
    face_model.eval()
    val_loss=0.0
    all_val_loss=[]
    with torch.no_grad():
        for epoch in range(epochs):
            running_loss = 0.0
            for i , data in enumerate(val_loader):
                images = data['image']
               # print(images.shape)
                cords = data['keypoints']
                #flattening the points
                cords_final = cords.view(cords.size(0),-1)
                #as we have to calculate regression loss , so we'll convert into float values
                cords_final = cords_final.type(torch.cuda.FloatTensor)
                images = images.type(torch.cuda.FloatTensor)
                # images = images.view(batch_size,images.shape[0],images.shape[1],images.shape[2])
                #print(images.shape)
                #forward pass
                out = face_model(images)
                #loss calculation
                loss_ = loss(out,cords_final)
                val_loss+=loss_.item()
                all_val_loss.append(val_loss)
                if i % 10 == 9:    # print every 10 batches
                    print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, i+1, val_loss/10))
                    val_loss=0

In [None]:
validation_loss = test_mod(10)

In [None]:
import os
#os.mkdir("/kaggle/working/models")

#creating a dictionary with model parameters for saving
checkpoint = {'model': face_model,
                            'state_dict': face_model.state_dict(),
                        'optimizer' : opt.state_dict()}
        
        #saving the model
torch.save(checkpoint, '/kaggle/working/models/' + 'model1.pth')
print("Model saved")

In [None]:
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = checkpoint['model']
    model.load_state_dict(checkpoint['state_dict'])
    # AS WE HAVE TO PREFORM TESTING, WE DONT NEED BACKPROPAGATOIN , so setting 'requires_grad' equals FALSE
    for parameter in model.parameters():
        parameter.requires_grad = False
   #RETURNING MODEL IN EVALUATIOIN MODE ::>  .eval() do not change anny behaviour of gradient calculations , but are used to set specific layers like 
   #                                         dropout and batchnorm to evaluation mode i.e. dropout layer won't drop activations and 
   #                                         batchnorm will use running estimates instead batch statistics.
    return model.eval()


#filepath =  + str(best_epoch) + ".pth"
#loading th model for testing
#loaded_model = load_checkpoint(filepath)