In [1]:
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from sklearn.model_selection import train_test_split
import subprocess
import datetime 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [2]:
IMAGE_SIZE_WIDTH = 96
IMAGE_SIZE_HEIGHT = 96

In [3]:
BATCH_SIZE = 256
LEARNING_RATE = 0.001
EPOCHS = 20

In [4]:
def create_folder(path: str):
    subprocess.run("mkdir -p " + path, shell=True, check=True)

In [5]:
df_dample_submissions = pd.read_csv("data/SampleSubmission.csv")
df_id_lookup_table = pd.read_csv("data/IdLookupTable.csv")
df_train = pd.read_csv("data/training.csv")
df_test = pd.read_csv("data/test.csv")

## Explore tables

explore tables

In [6]:
df_dample_submissions

Unnamed: 0,RowId,Location
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
27119,27120,0
27120,27121,0
27121,27122,0
27122,27123,0


In [7]:
df_id_lookup_table

Unnamed: 0,RowId,ImageId,FeatureName,Location
0,1,1,left_eye_center_x,
1,2,1,left_eye_center_y,
2,3,1,right_eye_center_x,
3,4,1,right_eye_center_y,
4,5,1,left_eye_inner_corner_x,
...,...,...,...,...
27119,27120,1783,right_eye_center_y,
27120,27121,1783,nose_tip_x,
27121,27122,1783,nose_tip_y,
27122,27123,1783,mouth_center_bottom_lip_x,


In [8]:
df_train

Unnamed: 0,left_eye_center_x,left_eye_center_y,right_eye_center_x,right_eye_center_y,left_eye_inner_corner_x,left_eye_inner_corner_y,left_eye_outer_corner_x,left_eye_outer_corner_y,right_eye_inner_corner_x,right_eye_inner_corner_y,...,nose_tip_y,mouth_left_corner_x,mouth_left_corner_y,mouth_right_corner_x,mouth_right_corner_y,mouth_center_top_lip_x,mouth_center_top_lip_y,mouth_center_bottom_lip_x,mouth_center_bottom_lip_y,Image
0,66.033564,39.002274,30.227008,36.421678,59.582075,39.647423,73.130346,39.969997,36.356571,37.389402,...,57.066803,61.195308,79.970165,28.614496,77.388992,43.312602,72.935459,43.130707,84.485774,238 236 237 238 240 240 239 241 241 243 240 23...
1,64.332936,34.970077,29.949277,33.448715,58.856170,35.274349,70.722723,36.187166,36.034723,34.361532,...,55.660936,56.421447,76.352000,35.122383,76.047660,46.684596,70.266553,45.467915,85.480170,219 215 204 196 204 211 212 200 180 168 178 19...
2,65.057053,34.909642,30.903789,34.909642,59.412000,36.320968,70.984421,36.320968,37.678105,36.320968,...,53.538947,60.822947,73.014316,33.726316,72.732000,47.274947,70.191789,47.274947,78.659368,144 142 159 180 188 188 184 180 167 132 84 59 ...
3,65.225739,37.261774,32.023096,37.261774,60.003339,39.127179,72.314713,38.380967,37.618643,38.754115,...,54.166539,65.598887,72.703722,37.245496,74.195478,50.303165,70.091687,51.561183,78.268383,193 192 193 194 194 194 193 192 168 111 50 12 ...
4,66.725301,39.621261,32.244810,38.042032,58.565890,39.621261,72.515926,39.884466,36.982380,39.094852,...,64.889521,60.671411,77.523239,31.191755,76.997301,44.962748,73.707387,44.227141,86.871166,147 148 160 196 215 214 216 217 219 220 206 18...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7044,67.402546,31.842551,29.746749,38.632942,,,,,,,...,67.029093,,,,,,,50.426637,79.683921,71 74 85 105 116 128 139 150 170 187 201 209 2...
7045,66.134400,38.365501,30.478626,39.950198,,,,,,,...,66.626011,,,,,,,50.287397,77.983023,60 60 62 57 55 51 49 48 50 53 56 56 106 89 77 ...
7046,66.690732,36.845221,31.666420,39.685042,,,,,,,...,67.515161,,,,,,,49.462572,78.117120,74 74 74 78 79 79 79 81 77 78 80 73 72 81 77 1...
7047,70.965082,39.853666,30.543285,40.772339,,,,,,,...,66.724988,,,,,,,50.065186,79.586447,254 254 254 254 254 238 193 145 121 118 119 10...


In [9]:
df_test

Unnamed: 0,ImageId,Image
0,1,182 183 182 182 180 180 176 169 156 137 124 10...
1,2,76 87 81 72 65 59 64 76 69 42 31 38 49 58 58 4...
2,3,177 176 174 170 169 169 168 166 166 166 161 14...
3,4,176 174 174 175 174 174 176 176 175 171 165 15...
4,5,50 47 44 101 144 149 120 58 48 42 35 35 37 39 ...
...,...,...
1778,1779,101 101 101 100 100 97 97 98 102 149 214 206 1...
1779,1780,201 191 171 158 145 140 136 130 123 115 108 10...
1780,1781,28 28 29 30 31 32 33 34 39 44 46 46 49 54 61 7...
1781,1782,104 95 71 57 46 52 65 70 70 67 76 72 69 69 72 ...


Show some image and prepare functions for visualizing 

In [10]:
def split(a, n):
    """Split a into n evenly sized chunks"""
    k, m = len(a) // n, len(a) % n
    return [a[i * k:(i + 1) * k] for i in range(n)]


def get_image(encoded_image: str):
    """get image from encoded string"""
    img_arr = list(map(lambda x: int(x), encoded_image.split(' ')))
    img_arr = np.array(split(img_arr, IMAGE_SIZE_WIDTH))

    img = Image.new("L", (IMAGE_SIZE_HEIGHT, IMAGE_SIZE_WIDTH))
    pixels = img.load()
    for i in range(img.size[0]):
        for j in range(img.size[1]):
            pixels[j, i] = int(img_arr[i][j])
    return img


def check_length(encoded_image: str):
    """checks if encoded image has valid dimensions"""
    assert encoded_image.count(' ') == IMAGE_SIZE_HEIGHT * \
        IMAGE_SIZE_WIDTH - 1, "image size don't equal expected size"


def draw_points(img, points: [], color=(255, 0, 0)):
    """returns image with added points"""
    rgb_img = img.convert('RGB')

    d = ImageDraw.Draw(rgb_img)
    for point in points:
        if np.isnan(point[0]):
            continue

        horizontal_line = [(int(point[0]) - 2, int(point[1])),
                           (int(point[0]) + 2, int(point[1]))]
        vertical_line = [(int(point[0]), int(point[1]) - 2),
                         (int(point[0]), int(point[1]) + 2)]
        d.line(horizontal_line, color)
        d.line(vertical_line, color)

    return rgb_img


def draw_training_record(record):
    """draws facial keypoints to image from dataset record and shows it. parametr record is row from pandas table"""
    points = []
    b = None
    for r in record.to_list()[:-1]:
        if b:
            points.append((b, r))
            b = None
        else:
            b = r

    img = get_image(record.to_list()[-1])
    img_points = draw_points(img, points)

    img_points.show()


def draw_predicted_target(img_arr, prediction, target):
    """Returns image and facial keypoints predicted and corect ones"""
    # create image to PIL class
    img = Image.new("L", (IMAGE_SIZE_HEIGHT, IMAGE_SIZE_WIDTH))
    pixels = img.load()
    for i in range(img.size[0]):
        for j in range(img.size[1]):
            pixels[j, i] = int(img_arr[i][j])

    # draw points to image
    img = draw_points(img, target)
    img = draw_points(img, prediction, color=(0, 0, 255))

    return img

In [11]:
record = df_train.iloc[1005]
draw_training_record(record)

## Checking datasets

### Training dataset

Check if lengths of an images are as expected

In [12]:
for i, img in df_train.iterrows():
    check_length(img["Image"])

for i, img in df_test.iterrows():
    check_length(img["Image"])

In [13]:
df_train.describe()

Unnamed: 0,left_eye_center_x,left_eye_center_y,right_eye_center_x,right_eye_center_y,left_eye_inner_corner_x,left_eye_inner_corner_y,left_eye_outer_corner_x,left_eye_outer_corner_y,right_eye_inner_corner_x,right_eye_inner_corner_y,...,nose_tip_x,nose_tip_y,mouth_left_corner_x,mouth_left_corner_y,mouth_right_corner_x,mouth_right_corner_y,mouth_center_top_lip_x,mouth_center_top_lip_y,mouth_center_bottom_lip_x,mouth_center_bottom_lip_y
count,7039.0,7039.0,7036.0,7036.0,2271.0,2271.0,2267.0,2267.0,2268.0,2268.0,...,7049.0,7049.0,2269.0,2269.0,2270.0,2270.0,2275.0,2275.0,7016.0,7016.0
mean,66.359021,37.651234,30.306102,37.976943,59.159339,37.944752,73.330478,37.707008,36.652607,37.989902,...,48.374186,62.715884,63.285735,75.970714,32.900404,76.179768,47.975412,72.919443,48.569472,78.970146
std,3.448233,3.152926,3.08323,3.033621,2.690354,2.307332,3.405852,2.881438,2.350268,2.311907,...,4.280038,5.720567,4.079063,4.579724,3.964552,4.417814,3.32306,5.2225,4.269961,5.407844
min,22.763345,1.616512,0.686592,4.091264,19.064954,27.190098,27.571879,26.250023,5.751046,26.250023,...,12.944699,17.932414,22.923362,57.023258,2.245766,56.690208,12.605173,56.719043,12.536477,25.850503
25%,65.082895,35.900451,28.783339,36.327681,58.039339,36.626283,71.716503,36.085523,35.506126,36.766783,...,46.60237,59.292697,61.257143,72.875707,30.797646,73.256472,46.494571,69.396748,46.573703,75.549715
50%,66.497566,37.528055,30.251378,37.813273,59.304615,37.87804,73.247824,37.639015,36.651562,37.944075,...,48.424713,63.451421,63.178286,75.778689,32.982305,76.000592,47.90549,72.605173,48.594978,78.696326
75%,68.024752,39.258449,31.768334,39.566729,60.51981,39.260348,75.015645,39.373489,37.753571,39.192917,...,50.330437,66.492686,65.376243,78.879123,35.101302,78.956357,49.296488,76.221644,50.677631,82.233993
max,94.68928,80.502649,85.039381,81.270911,84.440991,66.562559,95.25809,64.61823,70.714966,69.808803,...,89.438592,95.935644,84.767123,94.673637,74.017993,95.510678,83.989082,94.548359,89.438592,95.808983


In [14]:
df_train.isnull().sum()

left_eye_center_x              10
left_eye_center_y              10
right_eye_center_x             13
right_eye_center_y             13
left_eye_inner_corner_x      4778
left_eye_inner_corner_y      4778
left_eye_outer_corner_x      4782
left_eye_outer_corner_y      4782
right_eye_inner_corner_x     4781
right_eye_inner_corner_y     4781
right_eye_outer_corner_x     4781
right_eye_outer_corner_y     4781
left_eyebrow_inner_end_x     4779
left_eyebrow_inner_end_y     4779
left_eyebrow_outer_end_x     4824
left_eyebrow_outer_end_y     4824
right_eyebrow_inner_end_x    4779
right_eyebrow_inner_end_y    4779
right_eyebrow_outer_end_x    4813
right_eyebrow_outer_end_y    4813
nose_tip_x                      0
nose_tip_y                      0
mouth_left_corner_x          4780
mouth_left_corner_y          4780
mouth_right_corner_x         4779
mouth_right_corner_y         4779
mouth_center_top_lip_x       4774
mouth_center_top_lip_y       4774
mouth_center_bottom_lip_x      33
mouth_center_b

round numbers to whole numbers

In [15]:
df_train = df_train[df_train.columns[:-1]].apply(pd.Series.round).join(df_train.Image)
df_train

Unnamed: 0,left_eye_center_x,left_eye_center_y,right_eye_center_x,right_eye_center_y,left_eye_inner_corner_x,left_eye_inner_corner_y,left_eye_outer_corner_x,left_eye_outer_corner_y,right_eye_inner_corner_x,right_eye_inner_corner_y,...,nose_tip_y,mouth_left_corner_x,mouth_left_corner_y,mouth_right_corner_x,mouth_right_corner_y,mouth_center_top_lip_x,mouth_center_top_lip_y,mouth_center_bottom_lip_x,mouth_center_bottom_lip_y,Image
0,66.0,39.0,30.0,36.0,60.0,40.0,73.0,40.0,36.0,37.0,...,57.0,61.0,80.0,29.0,77.0,43.0,73.0,43.0,84.0,238 236 237 238 240 240 239 241 241 243 240 23...
1,64.0,35.0,30.0,33.0,59.0,35.0,71.0,36.0,36.0,34.0,...,56.0,56.0,76.0,35.0,76.0,47.0,70.0,45.0,85.0,219 215 204 196 204 211 212 200 180 168 178 19...
2,65.0,35.0,31.0,35.0,59.0,36.0,71.0,36.0,38.0,36.0,...,54.0,61.0,73.0,34.0,73.0,47.0,70.0,47.0,79.0,144 142 159 180 188 188 184 180 167 132 84 59 ...
3,65.0,37.0,32.0,37.0,60.0,39.0,72.0,38.0,38.0,39.0,...,54.0,66.0,73.0,37.0,74.0,50.0,70.0,52.0,78.0,193 192 193 194 194 194 193 192 168 111 50 12 ...
4,67.0,40.0,32.0,38.0,59.0,40.0,73.0,40.0,37.0,39.0,...,65.0,61.0,78.0,31.0,77.0,45.0,74.0,44.0,87.0,147 148 160 196 215 214 216 217 219 220 206 18...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7044,67.0,32.0,30.0,39.0,,,,,,,...,67.0,,,,,,,50.0,80.0,71 74 85 105 116 128 139 150 170 187 201 209 2...
7045,66.0,38.0,30.0,40.0,,,,,,,...,67.0,,,,,,,50.0,78.0,60 60 62 57 55 51 49 48 50 53 56 56 106 89 77 ...
7046,67.0,37.0,32.0,40.0,,,,,,,...,68.0,,,,,,,49.0,78.0,74 74 74 78 79 79 79 81 77 78 80 73 72 81 77 1...
7047,71.0,40.0,31.0,41.0,,,,,,,...,67.0,,,,,,,50.0,80.0,254 254 254 254 254 238 193 145 121 118 119 10...


### Test dataset

...

## Prepare data

Because there are many missing facial keypoints. I will train model for each predicted facial keypoint.
Thus each record will be used. (Because I don't know how to backpropagate null values)

In [None]:
def encoded_images_to_2darray(encoded_images):
    """decodes image from string to 2d array of integers"""
    images = []
    for e_i in encoded_images:
        e_i = list(map(lambda x: int(x), e_i.split(' ')))
        d_i = split(e_i, IMAGE_SIZE_WIDTH)
        images.append(d_i)
    return images
    
data = {}
for (x, y) in zip(df_train.columns[:-1:2], df_train.columns[1:-1:2]):
    # get non null values of trainin dataset
    df_train_data = df_train[~df_train[x].isna()]
    X = df_train_data["Image"].values.tolist()
    y = np.array(list(zip(df_train_data[x].values.tolist(), df_train_data[y].values.tolist())))
    
    # split to train and validation sets
    X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.3, train_size=0.7)
    
    # save temporary data
    X_train = np.array(encoded_images_to_2darray(X_train))
    X_validation = np.array(encoded_images_to_2darray(X_validation))
    y_train = np.array(y_train).astype(int) # list(map(lambda x: int(x), y_train)),
    y_validation = np.array(y_validation).astype(int) # list(map(lambda x: int(x), y_validation))
    
    # save data as datasets
    print(X_train.shape, X_validation.shape, y_train.shape, y_validation.shape)
    dataset_train = TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
    dataset_validation = TensorDataset(torch.Tensor(X_validation), torch.Tensor(y_validation))
    data[x[:-2]] = {
        "dataloader_train": DataLoader(dataset_train, BATCH_SIZE),
        "dataloader_validation": DataLoader(dataset_validation, BATCH_SIZE)
    }
    
    print(x[:-2], "done")


(4927, 96, 96) (2112, 96, 96) (4927, 2) (2112, 2)
left_eye_center done
(4925, 96, 96) (2111, 96, 96) (4925, 2) (2111, 2)
right_eye_center done
(1589, 96, 96) (682, 96, 96) (1589, 2) (682, 2)
left_eye_inner_corner done
(1586, 96, 96) (681, 96, 96) (1586, 2) (681, 2)
left_eye_outer_corner done
(1587, 96, 96) (681, 96, 96) (1587, 2) (681, 2)
right_eye_inner_corner done


## PyTorch implementation

create CNN architecture

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=4,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=4,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = nn.Flatten()
#         self.dropout = nn.Dropout(0.10)
        
        self.linear1 = nn.Linear(32*12*12, 32) # 32*12*12 = 4608
        self.linear2 = nn.Linear(32, 2)
        self.linear = nn.Linear(32*12*12, 2)
        
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        
        x = self.relu(self.linear(x))
        
        return x

define training function

In [None]:
def train_single_epoch(model, train_dataloader, loss_fn, optimiser, device, validation_dataloader):
    # TRAIN LOSS + BACKPROPAGATION
    train_loss = 0
    num_batches = 0
    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        # ?? why this has to be done
        X = torch.unsqueeze(X, dim=1)

        # calculate loss
        prediction = model(X)
        _loss = loss_fn(prediction, y)
        train_loss += _loss.item()
        
        # reset gradient
        optimiser.zero_grad()
        
        # backporpagation
        _loss.backward()
        
        # update parameters
        optimiser.step()
        
        num_batches += 1
    train_loss = train_loss / num_batches
    print(f"train loss: {train_loss}")

    # VALIDATION LOSS
    validation_loss = 0
    num_batches = 0
    for X, y in validation_dataloader:
        X, y = X.to(device), y.to(device)

        # ?? why this has to be done
        X = torch.unsqueeze(X, dim=1)

        # calculate loss
        prediction = model(X)
        validation_loss += loss_fn(prediction, y).item()
        
        num_batches += 1
    validation_loss = validation_loss / num_batches
    print(f"validation loss: {validation_loss}")
    
    return (train_loss, validation_loss)
    
def train(model, train_dataloader, loss_fn, optimiser, device, epochs, validation_dataloader):
    loss_history = []
    for i in range(epochs):
        print(f"Epoch {i + 1}")
        loss = train_single_epoch(model, train_dataloader, loss_fn, optimiser, device, validation_dataloader)
        print("--------------------------")
        loss_history.append(loss)
    print("==========================")
    print("Training done")
    
    return loss_history

define predictions

In [None]:
def predict(model, x):
    model.eval()
    with torch.no_grad():
        predictions = model(x)
        predicted_point = predictions[0]
        
    return predicted_point

prepare training for feature "left_eye_center" point

In [None]:
#################### TRAIN MODELS ########################

# if torch.cuda.is_available():
#     device = "cuda"
# else:
#     device = "cpu"
# print(f"Using {device}", "\n")

# # prepare folder for models
# folder_path = "models/" + datetime.datetime.now().strftime("%y:%m:%d:%H:%M:%S")
# create_folder(folder_path)

# models = {}
# loss_history = {}

# for f in data.keys():
#     print("=======================================")
#     print("TRAINING", f.upper())
#     print("=======================================")
#     # construct model and assign it to device
#     models[f] = CNN().to(device)

# #     print("========= Architecture =========")
# #     print(models[f])
# #     print("================================")

#     # initialize loss function + optimiser
#     loss_fn = nn.MSELoss()
#     optimiser = torch.optim.Adam(models[f].parameters(), lr=LEARNING_RATE)

#     # train model
#     loss_history[f] = train(models[f], data[f]["dataloader_train"], loss_fn, optimiser, device, EPOCHS, data[f]["dataloader_validation"])
    
#     # save model
#     file_path = folder_path + "/" + f + ".pth"
#     torch.save(models[f].state_dict(), file_path)
    
#     # save loss history
#     file_content = "\n".join(list(map(lambda x: x[0] + " " + x[1], np.array(loss_history[f]).astype(str))))
#     file_path = folder_path + "/" + "loss_history_" + f + ".txt"
#     with open(file_path, "w") as f:
#         f.write(file_content)

################# LOAD MODELS ##################

# directory where models are stored
models_path = "models/22:12:07:16:20:13/"

models = {}

for (dirpath, dirnames, filenames) in os.walk(models_path):
    for filename in filenames:
        if filename.endswith('.pth'): 
            # get model name
            modelname = filename[:-4]

            # load model
            models[modelname] = TheModelClass(*args, **kwargs)
            models[modelname].load_state_dict(torch.load(models_path + filename))
            models[modelname].eval()


get some random prediction on validation dataset

In [None]:
# idx = 1
# dataset = "validation"
# # ----------------------
# predictions = []
# targets = []
# for f in models.keys():
#     validation = next(iter(data[f][f"dataloader_{dataset}"]))
#     image = validation[0][idx]
#     target = validation[1][idx]
#     prediction = predict(models[f], torch.Tensor(np.expand_dims(np.expand_dims(image, axis=0), axis=0)))
#     predictions.append(prediction)
#     targets.append(target)
    
    
idx = 1
dataset = "validation"
# ----------------------
for f in models.keys():
    validation = next(iter(data[f][f"dataloader_{dataset}"]))
    image = validation[0][idx]
    target = validation[1][idx]
    prediction = predict(models[f], torch.Tensor(np.expand_dims(np.expand_dims(image, axis=0), axis=0)))
    draw_predicted_target(image, [prediction], [target])

draw on image prediction (blue) and target(red)

In [None]:
# draw_predicted_target(image, predictions, targets)