In [0]:
'''
my version of the paper https://arxiv.org/pdf/1710.00977.pdf



IN :1x224x224
68 face keypoint

'''


In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import sys
sys.path.append("/content/drive/My Drive/key_point")

In [0]:
!mkdir /data
!wget -P /data/ https://s3.amazonaws.com/video.udacity-data.com/topher/2018/May/5aea1b91_train-test-data/train-test-data.zip
!unzip -n /data/train-test-data.zip -d /data

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as I

import matplotlib.pyplot as plt
import numpy as np

#from model_NaimishNet import Net
from model_NaimishNet_3 import Net


net = Net()
print(net)

In [0]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# the dataset we created in Notebook 1 is copied in the helper file `data_load.py`
from data_load import FacialKeypointsDataset
# the transforms we defined in Notebook 1 are in the helper file `data_load.py`
from data_load import Rescale, RandomCrop, Normalize, ToTensor


## TODO: define the data_transform using transforms.Compose([all tx's, . , .])
# order matters! i.e. rescaling should come before a smaller crop
data_transform = transforms.Compose([ Rescale(250), RandomCrop(224),  Normalize(), ToTensor()]) # no cropping in the paper

# testing that you've defined a transform
assert(data_transform is not None), 'Define a data_transform'

In [0]:
# create the transformed dataset
transformed_dataset = FacialKeypointsDataset(csv_file='/data/training_frames_keypoints.csv',
                                             root_dir='/data/training/',
                                             transform=data_transform)


print('Number of images: ', len(transformed_dataset))

# iterate through the transformed dataset and print some stats about the first few samples
for i in range(4):
    sample = transformed_dataset[i]
    print(i, sample['image'].size(), sample['keypoints'].size())

In [0]:
transformed_dataset.key_pts_frame.columns

In [0]:
batch_size = 30
# load training data in batches
train_loader = DataLoader(transformed_dataset, 
                          batch_size=batch_size,
                          shuffle=True, 
                          num_workers=4)


# create the test dataset
test_dataset = FacialKeypointsDataset(csv_file='/data/test_frames_keypoints.csv',
                                             root_dir='/data/test/',
                                             transform=data_transform)

# load test data in batches
test_loader = DataLoader(test_dataset, 
                          batch_size=batch_size,
                          shuffle=True, 
                          num_workers=4)

In [0]:
# test the model on a batch of test images

def net_sample_output():
    
    # iterate through the test dataset
    for i, sample in enumerate(test_loader):
        
        # get sample data: images and ground truth keypoints
        images = sample['image']
        key_pts = sample['keypoints']

        # convert images to FloatTensors
        images = images.type(torch.FloatTensor)
        print ("images.shape", images.shape)
        # forward pass to get net output
        output_pts = net(images)
        
        print ("output_pts.shape", output_pts.shape)
        # reshape to batch_size x 68 x 2 pts
        output_pts = output_pts.view(output_pts.size()[0], 68, -1)
        
        # break after first image is tested
        if i == 0:
            return images, output_pts, key_pts
            

In [0]:
#net_sample_output()

In [0]:
test_images, test_outputs, gt_pts = net_sample_output()

# print out the dimensions of the data to see if they make sense
print(test_images.data.size())
print(test_outputs.data.size())
print(gt_pts.size())

In [0]:
def show_all_keypoints(image, predicted_key_pts, gt_pts=None):
    """Show image with predicted keypoints"""
    # image is grayscale
    plt.imshow(image, cmap='gray')
    plt.scatter(predicted_key_pts[:, 0], predicted_key_pts[:, 1], s=20, marker='.', c='m')
    # plot ground truth points as green pts
    if gt_pts is not None:
        plt.scatter(gt_pts[:, 0], gt_pts[:, 1], s=20, marker='x', c='g')


In [0]:
# visualize the output
# by default this shows a batch of 10 images
def visualize_output(test_images, test_outputs, gt_pts=None, batch_size=10):

    for i in range(batch_size):
        plt.figure(figsize=(20,10))
        ax = plt.subplot(1, batch_size, i+1)

        # un-transform the image data
        image = test_images[i].data   # get the image from it's Variable wrapper
        image = image.numpy()   # convert to numpy array from a Tensor
        image = np.transpose(image, (1, 2, 0))   # transpose to go from torch to numpy image

        # un-transform the predicted key_pts data
        predicted_key_pts = test_outputs[i].data
        predicted_key_pts = predicted_key_pts.numpy()
        # undo normalization of keypoints  
        predicted_key_pts = predicted_key_pts*50.0+100
        
        # plot ground truth points for comparison, if they exist
        ground_truth_pts = None
        if gt_pts is not None:
            ground_truth_pts = gt_pts[i]         
            ground_truth_pts = ground_truth_pts*50.0+100
        
        # call show_all_keypoints
        show_all_keypoints(np.squeeze(image), predicted_key_pts, ground_truth_pts)
            
        plt.axis('off')

    plt.show()
    
    
# call it
visualize_output(test_images, test_outputs, gt_pts)

In [0]:
import torch.optim as optim

criterion = nn.MSELoss()

optimizer = optim.Adam(net.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-06)

In [0]:
def train_net(n_epochs):

    # prepare the net for training
    net.train()

    for epoch in range(n_epochs):  # loop over the dataset multiple times
        
        running_loss = 0.0

        # train on batches of data, assumes you already have train_loader
        for batch_i, data in enumerate(train_loader):
            # get the input images and their corresponding labels
            images  = data['image']
            key_pts = data['keypoints']

            # flatten pts
            key_pts = key_pts.view(key_pts.size(0), -1)

            # convert variables to floats for regression loss
            key_pts = key_pts.type(torch.FloatTensor)
            images  = images.type(torch.FloatTensor)

            # forward pass to get outputs
            output_pts = net(images)

            # calculate the loss between predicted and target keypoints
            loss = criterion(output_pts, key_pts)

            # zero the parameter (weight) gradients
            optimizer.zero_grad()
            
            # backward pass to calculate the weight gradients
            loss.backward()

            # update the weights
            optimizer.step()

            # print loss statistics
            running_loss += loss.item()
            print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, batch_i+1, running_loss/10))
            if batch_i % 10 == 9:    # print every 10 batches
                print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(epoch + 1, batch_i+1, running_loss/10))
                running_loss = 0.0

    print('Finished Training')


In [0]:
# train your network
n_epochs =20 # start small, and increase when you've decided on your model structure and hyperparams shuld be 300

# this is a Workspaces-specific context manager to keep the connection
# alive while training your model, not part of pytorch
#with active_session():
train_net(n_epochs)

In [0]:
# get a sample of test data again
test_images, test_outputs, gt_pts = net_sample_output()

print(test_images.data.size())
print(test_outputs.data.size())
print(gt_pts.size())

In [0]:
## TODO: visualize your test output
# you can use the same function as before, by un-commenting the line below:

visualize_output(test_images, test_outputs, gt_pts)

In [0]:
## TODO: change the name to something uniqe for each new model
model_dir = "/content/drive/My Drive/key_point/"
model_name = 'keypoints_model_20190927_7x7_epocs10_btc30_lr0001.pt'

# after training, save your model parameters in the dir 'saved_models'
torch.save(net.state_dict(), model_dir+model_name)

In [0]:
# Get the weights in the first conv layer, "conv1"
# if necessary, change this to reflect the name of your first conv layer
weights1 = net.conv1.weight.data

w = weights1.numpy()

filter_index = 0

print(w[filter_index][0])
print(w[filter_index][0].shape)

# display the filter weights
plt.imshow(w[filter_index][0], cmap='gray')


In [0]:
import cv2 as cv


In [0]:
##TODO: load in and display any image from the transformed test dataset

## TODO: Using cv's filter2D function,
## apply a specific set of filter weights (like the one displayed above) to the test image


def w_filter(img,kernel):
    
  #img = cv.imread(cv.samples.findFile(imageName), cv.IMREAD_COLOR)

    
    
  ind = 0

  #kernel_size = 3 + 2 * (ind % 5)
  #kernel = np.ones((kernel_size, kernel_size), dtype=np.float32)
  #kernel /= (kernel_size * kernel_size)

  dst = cv.filter2D(img, -1, kernel)

  return dst
  

  
img_num = 9  
img    = test_images[img_num].data.numpy()

kernel = net.conv1.weight.data.numpy()[0][0] *0.1

#kernel = np.ones((3,3),np.float32)  #testing on simple kerenl
#kernel[1,1] = 8

filtered_img = w_filter(img,kernel)

plt.figure(figsize=(20,10))
imgs = [kernel, img[0,:,:], filtered_img[0,:,:] ]
for i in range(len(imgs)):
  ax = plt.subplot(1, 3, i+1)
  plt.imshow(imgs[i], cmap='gray')
  plt.axis('off')
plt.show


for i in range(len(imgs)):
  print (imgs[i].shape)

In [0]:
i=5
batch_size = 7
plt.figure(figsize=(75,25))
ax = plt.subplot(1, batch_size, i+1)

# un-transform the image data
image = test_images[i].data   # get the image from it's Variable wrapper
image = image.numpy()   # convert to numpy array from a Tensor
image = np.transpose(image, (1, 2, 0))   # transpose to go from torch to numpy image

# un-transform the predicted key_pts data
predicted_key_pts = test_outputs[i].data
predicted_key_pts = predicted_key_pts.numpy()
# undo normalization of keypoints  
predicted_key_pts = predicted_key_pts*50.0 + 100
        
# plot ground truth points for comparison, if they exist
ground_truth_pts = None
if gt_pts is not None:
  ground_truth_pts = gt_pts[i]         
  ground_truth_pts = ground_truth_pts*50.0+100
        
# call show_all_keypoints
show_all_keypoints(np.squeeze(image), predicted_key_pts, ground_truth_pts)
            
#plt.axis('off')

plt.show()