In [None]:
import os
import pandas as pd
import scipy.io
import numpy as np 
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
import xgboost
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

%matplotlib inline

# **Data based on MPII dataset**

## Here we are only considering the annotations(labels) for the eye-rectangle images

## For more context check out: https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/appearance-based-gaze-estimation-in-the-wild

### Each person has "day" folders where the "day" folder contains appropriate labels for the images taken throughout the day

#### Now let us look at our data 

In [None]:
data_dir = "../input/mpii-modified-dataset/MPII_Augmented_Dataset"
people = os.listdir(data_dir)
people.sort()
print(people)

#### Iterating throughout the dataset and creating our dataframe that contains only the pupil coordinates and screen-size as input, along with the gaze position on the screen as output

In [None]:
final_df_in = pd.DataFrame()
df_gaze_vectors = pd.DataFrame()
df_pupil_coords = pd.DataFrame()
final_df_out = pd.DataFrame()

for i in range(len(people)):
    
    current_path = data_dir + "/" + people[i]
    days = os.listdir(current_path)
    days.sort()
    df_in_person = pd.DataFrame()
    df_out_person = pd.DataFrame()
    df_pupil = pd.DataFrame()
    df_vectors = pd.DataFrame()
    screen_size = scipy.io.loadmat(current_path + "/Calibration/screenSize.mat")
    screen_height = screen_size['height_pixel']
    screen_width = screen_size['width_pixel']
    
    for j in range(len(days)):
        
        if(days[j] == "Calibration" or (i == 2 and j == 29) or (i == 9 and j == 8) or (i == 10 and j == 6)):
            continue
        
        df = pd.read_csv(current_path + "/" + days[j] + "/annotation.csv")
        df_in = df[["gt1", "gt2", "gt3"]].copy()
        df_out = df[["screen_pos1", "screen_pos2"]].copy()
        df_pup = df[["reye_x", "reye_y", "reye_z", "leye_x", "leye_y", "leye_z"]].copy()
        df_vector = df[["gt1", "gt2", "gt3"]].copy()
    
        df_in_person = df_in_person.append(df_in)
        df_out_person = df_out_person.append(df_out)
        df_pupil = df_pupil.append(df_pup)
        df_vectors = df_vectors.append(df_vector)
    
    df_in_person['screen_height'] = screen_height[0][0]
    df_in_person['screen_width'] = screen_width[0][0]

    final_df_in = final_df_in.append(df_in_person)
    final_df_out = final_df_out.append(df_out_person)
    df_pupil_coords = df_pupil_coords.append(df_pupil)
    df_gaze_vectors = df_gaze_vectors.append(df_vectors)

#### Let us describe the dataframes

In [None]:
final_df_in.describe()

In [None]:
final_df_out.head()

In [None]:
df_pupil_coords.describe()

In [None]:
df_gaze_vectors.head()

#### Now let us convert the data in our data frame to a tensor in PyTorch

In [None]:
input_array = df_pupil_coords.to_numpy()
output_array = df_gaze_vectors.to_numpy()
input_array = input_array.astype(np.float32)
output_array = output_array.astype(np.float32)

input_tensor = torch.from_numpy(input_array)
output_tensor = torch.from_numpy(output_array)

In [None]:
gaze_to_screen_x = final_df_in.values
gaze_to_screen_y = final_df_out.values

pupil_to_gaze_x = df_pupil_coords.values
pupil_to_gaze_y = df_gaze_vectors.values

### Using gradient boosting

### First Regressor

In [None]:
# created nn for now, have to see what happens next

In [None]:
pupil_to_gaze_xgb = MultiOutputRegressor(xgboost.XGBRegressor(objective = 'reg:squarederror', eval_metric = 'rmse'))
pupil_to_gaze_xgb.fit(pupil_to_gaze_x, pupil_to_gaze_y)

### Model Performance

In [None]:
print(np.mean((pupil_to_gaze_xgb.predict(pupil_to_gaze_x) - pupil_to_gaze_y) ** 2, axis=0))

print(pupil_to_gaze_xgb.predict([pupil_to_gaze_x[0]]))

### Second Regressor

In [None]:
gaze_to_screen_xgb = MultiOutputRegressor(xgboost.XGBRegressor(objective = 'reg:squarederror', eval_metric = 'rmse'))
gaze_to_screen_xgb.fit(gaze_to_screen_x, gaze_to_screen_y)

In [None]:
print(np.mean((gaze_to_screen_xgb.predict(gaze_to_screen_x) - gaze_to_screen_y) ** 2, axis=0))

print(gaze_to_screen_xgb.predict([gaze_to_screen_x[0]]))

### Using a Neural Network

#### Now let us partition this dataset into training and validation datasets

In [None]:
ds = TensorDataset(input_tensor, output_tensor)

val_size = int((0.20) * (input_tensor.shape[0]))

train_size = input_tensor.shape[0] - val_size

train_ds, val_ds = random_split(ds, [train_size, val_size])

batch_size = 512

#### Creating the Dataloaders

In [None]:
train_loader = DataLoader(train_ds, batch_size, shuffle = True, num_workers = 4, pin_memory = True)
val_loader = DataLoader(val_ds, batch_size * 2, num_workers = 4, pin_memory = True)

### Defining and creating our model

In [None]:
class GazeModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(6, 3)
        
    def forward(self, xb):
        out = self.layer1(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        loss = F.mse_loss(out, targets)
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch 
        out = self(inputs)                
        loss = F.mse_loss(out, targets)    
        return {'val_loss': loss.detach()}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}".format(epoch, result['val_loss']))

In [None]:
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.Adam):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result)
        history.append(result)
    return history

#### Training the model

In [None]:
model = GazeModel()
result = evaluate(model, val_loader)
result

In [None]:
history = fit(3, 1, model, train_loader, val_loader)

In [None]:
history = fit(5, 5 * 1e-1, model, train_loader, val_loader)

In [None]:
history = fit(5, 1e-1, model, train_loader, val_loader)

In [None]:
losses = [r['val_loss'] for r in [result] + history]
plt.plot(losses, '-x')
plt.xlabel('epoch')
plt.ylabel('val_loss')
plt.title('val_loss vs. epochs');