# (Code we modified) Mounting our drive folder, which contains the dataset

In [None]:
from pathlib import Path
from google.colab import drive
drive.mount('/content/drive')

# Set to false if we want to use the small dataset
use_large_dataset = True

project_folder = './drive/MyDrive/CSC413Project'
code_folder = f'{project_folder}/code'

if use_large_dataset:
  # Load and unzip the 10GB dataset file (if applicable)
  if not Path("./data10gb_proc_2").is_dir():
    data_folder = f'{project_folder}/data10gb.zip'
    !unzip './drive/MyDrive/CSC413Project/data/data10gb.zip'
  else:
    print("Large dataset already loaded")

else:
  data_folder = f'{project_folder}/data'

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: data10gb_proc_2/00247/appleLeftEye/01907.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01734.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01052.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01046.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00358.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01720.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00416.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01708.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00370.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00364.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00402.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01293.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00833.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/00827.jpg  
  inflating: data10gb_proc_2/00247/appleLeftEye/01287.jpg  
  inflating: data10gb_proc_2/00247/

# ITrackerData Preprocessing

In [None]:

import torch.utils.data as data
import scipy.io as sio
from PIL import Image
import os
import os.path
import torchvision.transforms as transforms
import torch
import numpy as np
import re

'''
Data loader for the iTracker.
Use prepareDataset.py to convert the dataset from http://gazecapture.csail.mit.edu/ to proper format.

Author: Petr Kellnhofer ( pkel_lnho (at) gmai_l.com // remove underscores and spaces), 2018. 

Website: http://gazecapture.csail.mit.edu/

Cite:

Eye Tracking for Everyone
K.Krafka*, A. Khosla*, P. Kellnhofer, H. Kannan, S. Bhandarkar, W. Matusik and A. Torralba
IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016

@inproceedings{cvpr2016_gazecapture,
Author = {Kyle Krafka and Aditya Khosla and Petr Kellnhofer and Harini Kannan and Suchendra Bhandarkar and Wojciech Matusik and Antonio Torralba},
Title = {Eye Tracking for Everyone},
Year = {2016},
Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}
}

'''

MEAN_PATH = './drive/MyDrive/CSC413Project/code/'
def loadMetadata(filename, silent = False):
    try:
        # http://stackoverflow.com/questions/6273634/access-array-contents-from-a-mat-file-loaded-using-scipy-io-loadmat-python
        if not silent:
            print('\tReading metadata from %s...' % filename)
        metadata = sio.loadmat(filename, squeeze_me=True, struct_as_record=False)
    except:
        print('\tFailed to read the meta file "%s"!' % filename)
        return None
    return metadata

class SubtractMean(object):
    """Normalize an tensor image with mean.
    """

    def __init__(self, meanImg):
        self.meanImg = transforms.ToTensor()(meanImg / 255)

    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """       
        return tensor.sub(self.meanImg)


class ITrackerData(data.Dataset):
    def __init__(self, dataPath, split = 'train', imSize=(224,224), gridSize=(25, 25)):

        self.dataPath = dataPath
        self.imSize = imSize
        self.gridSize = gridSize

        print('Loading iTracker dataset...')
        metaFile = os.path.join(dataPath, 'metadata.mat')
        #metaFile = 'metadata.mat'
        if metaFile is None or not os.path.isfile(metaFile):
            raise RuntimeError('There is no such file %s! Provide a valid dataset path.' % metaFile)
        self.metadata = loadMetadata(metaFile)
        if self.metadata is None:
            raise RuntimeError('Could not read metadata file %s! Provide a valid dataset path.' % metaFile)

        self.faceMean = loadMetadata(os.path.join(MEAN_PATH, 'mean_face_224.mat'))['image_mean']
        self.eyeLeftMean = loadMetadata(os.path.join(MEAN_PATH, 'mean_left_224.mat'))['image_mean']
        self.eyeRightMean = loadMetadata(os.path.join(MEAN_PATH, 'mean_right_224.mat'))['image_mean']
        
        self.transformFace = transforms.Compose([
            transforms.Resize(self.imSize),
            transforms.ToTensor(),
            SubtractMean(meanImg=self.faceMean),
        ])
        self.transformEyeL = transforms.Compose([
            transforms.Resize(self.imSize),
            transforms.ToTensor(),
            SubtractMean(meanImg=self.eyeLeftMean),
        ])
        self.transformEyeR = transforms.Compose([
            transforms.Resize(self.imSize),
            transforms.ToTensor(),
            SubtractMean(meanImg=self.eyeRightMean),
        ])


        if split == 'test':
            mask = self.metadata['labelTest']
        elif split == 'val':
            mask = self.metadata['labelVal']
        else:
            mask = self.metadata['labelTrain']

        self.indices = np.argwhere(mask)[:,0]
        # TODO: Commented out, since we are only testing in this colab
        # print('Loaded iTracker dataset split "%s" with %d records...' % (split, len(self.indices)))

    def loadImage(self, path):
        try:
            im = Image.open(path).convert('RGB')
        except OSError:
            raise RuntimeError('Could not read image: ' + path)
            #im = Image.new("RGB", self.imSize, "white")

        return im


    def makeGrid(self, params):
        gridLen = self.gridSize[0] * self.gridSize[1]
        grid = np.zeros([gridLen,], np.float32)
        
        indsY = np.array([i // self.gridSize[0] for i in range(gridLen)])
        indsX = np.array([i % self.gridSize[0] for i in range(gridLen)])
        condX = np.logical_and(indsX >= params[0], indsX < params[0] + params[2]) 
        condY = np.logical_and(indsY >= params[1], indsY < params[1] + params[3]) 
        cond = np.logical_and(condX, condY)

        grid[cond] = 1
        return grid

    def __getitem__(self, index):
        index = self.indices[index]

        imFacePath = os.path.join(self.dataPath, '%05d/appleFace/%05d.jpg' % (self.metadata['labelRecNum'][index], self.metadata['frameIndex'][index]))
        imEyeLPath = os.path.join(self.dataPath, '%05d/appleLeftEye/%05d.jpg' % (self.metadata['labelRecNum'][index], self.metadata['frameIndex'][index]))
        imEyeRPath = os.path.join(self.dataPath, '%05d/appleRightEye/%05d.jpg' % (self.metadata['labelRecNum'][index], self.metadata['frameIndex'][index]))

        imFace = self.loadImage(imFacePath)
        imEyeL = self.loadImage(imEyeLPath)
        imEyeR = self.loadImage(imEyeRPath)

        imFace = self.transformFace(imFace)
        imEyeL = self.transformEyeL(imEyeL)
        imEyeR = self.transformEyeR(imEyeR)

        gaze = np.array([self.metadata['labelDotXCam'][index], self.metadata['labelDotYCam'][index]], np.float32)

        faceGrid = self.makeGrid(self.metadata['labelFaceGrid'][index,:])

        # to tensor
        row = torch.LongTensor([int(index)])
        faceGrid = torch.FloatTensor(faceGrid)
        gaze = torch.FloatTensor(gaze)

        return row, imFace, imEyeL, imEyeR, faceGrid, gaze
    
        
    def __len__(self):
        return len(self.indices)


# ITrackerModel Code

In [None]:

import argparse
import os
import shutil
import time, math
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import torch.utils.model_zoo as model_zoo
from torch.autograd.variable import Variable

'''
Pytorch model for the iTracker.

Author: Petr Kellnhofer ( pkel_lnho (at) gmai_l.com // remove underscores and spaces), 2018. 

Website: http://gazecapture.csail.mit.edu/

Cite:

Eye Tracking for Everyone
K.Krafka*, A. Khosla*, P. Kellnhofer, H. Kannan, S. Bhandarkar, W. Matusik and A. Torralba
IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016

@inproceedings{cvpr2016_gazecapture,
Author = {Kyle Krafka and Aditya Khosla and Petr Kellnhofer and Harini Kannan and Suchendra Bhandarkar and Wojciech Matusik and Antonio Torralba},
Title = {Eye Tracking for Everyone},
Year = {2016},
Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}
}

'''


class ItrackerImageModel(nn.Module):
    # Used for both eyes (with shared weights) and the face (with unqiue weights)
    def __init__(self):
        super(ItrackerImageModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 64, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
            
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return x

class FaceImageModel(nn.Module):
    
    def __init__(self):
        super(FaceImageModel, self).__init__()
        self.conv = ItrackerImageModel()
        self.fc = nn.Sequential(
            nn.Linear(12*12*64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

class FaceGridModel(nn.Module):
    # Model for the face grid pathway
    def __init__(self, gridSize = 25):
        super(FaceGridModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(gridSize * gridSize, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x



class ITrackerModel(nn.Module):
    def __init__(self):
        super(ITrackerModel, self).__init__()
        self.eyeModel = ItrackerImageModel()
        self.faceModel = FaceImageModel()
        self.gridModel = FaceGridModel()
        # Joining both eyes
        self.eyesFC = nn.Sequential(
            nn.Linear(2*12*12*64, 128),
            nn.ReLU(inplace=True),
            )
        # Joining everything
        self.fc = nn.Sequential(
            nn.Linear(128+64+128, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),
            )

    def forward(self, faces, eyesLeft, eyesRight, faceGrids):
        # Eye nets
        xEyeL = self.eyeModel(eyesLeft)
        xEyeR = self.eyeModel(eyesRight)
        # Cat and FC
        xEyes = torch.cat((xEyeL, xEyeR), 1)
        xEyes = self.eyesFC(xEyes)

        # Face net
        xFace = self.faceModel(faces)
        xGrid = self.gridModel(faceGrids)

        # Cat all
        x = torch.cat((xEyes, xFace, xGrid), 1)
        x = self.fc(x)
        
        return x


# (Code we modified) Models that represent different branches in the iTracker model

In [None]:

class LeftEyeBranch(nn.Module):
    def __init__(self):
        super(LeftEyeBranch, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 64, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
        )

        self.fc = nn.Sequential(
            nn.Linear(12*12*64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),
        )

    def forward(self, faces, eyesLeft, eyesRight, faceGrids):
        x = self.features(eyesLeft)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


class RightEyeBranch(nn.Module):
    def __init__(self):
        super(RightEyeBranch, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 64, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
        )

        self.fc = nn.Sequential(
            nn.Linear(12*12*64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2),
        )

    def forward(self, faces, eyesLeft, eyesRight, faceGrids):
        x = self.features(eyesRight)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


class JointEyeBranch(nn.Module):
    def __init__(self):
        super(JointEyeBranch, self).__init__()
        self.eyeModel = ItrackerImageModel()

        # Joining both eyes
        self.eyesFC = nn.Sequential(
            nn.Linear(2*12*12*64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 2)
        )

    def forward(self, faces, eyesLeft, eyesRight, faceGrids):
        # Eye nets (using shared weights)
        xEyeL = self.eyeModel(eyesLeft)
        xEyeR = self.eyeModel(eyesRight)
        # Cat and FC
        xEyes = torch.cat((xEyeL, xEyeR), 1)
        xEyes = self.eyesFC(xEyes)
        return xEyes


class FaceBranch(nn.Module):
    def __init__(self):
        super(FaceBranch, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2, groups=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.CrossMapLRN2d(size=5, alpha=0.0001, beta=0.75, k=1.0),
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 64, kernel_size=1, stride=1, padding=0),
            nn.ReLU(inplace=True),
        )
        self.fc = nn.Sequential(
            nn.Linear(12*12*64, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2),
        )

    def forward(self, faces, eyesLeft, eyesRight, faceGrids):
        x = self.features(faces)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


# (Code we modified) Specifying which model to run using argparser


In [None]:

import argparse

def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = argparse.ArgumentParser(description='iTracker-pytorch-Trainer.')
parser.add_argument('--data_path', help="Path to processed dataset. It should contain metadata.mat. Use prepareDataset.py.")
parser.add_argument('--code_path', help="Path to project code.")
parser.add_argument('--sink', type=str2bool, nargs='?', const=True, default=False, help="Just sink and terminate (i.e, run a small test example)")
parser.add_argument('--reset', type=str2bool, nargs='?', const=True, default=False, help="Start from scratch (do not load).")
parser.add_argument('--model', help="Which model to use")
parser.add_argument('--prepopulate', type=str2bool, nargs='?', const=True, default=False, help="Prepopulate weights from full model into branch (branch models only).")
parser.add_argument('--train_last_fc_only', type=str2bool, nargs='?', const=True, default=False, help="If we want to only train the last FC layer.")
parser.add_argument('--model_name', help="Name of the model (used when saving checkpoints).")


data_path = f'./data10gb_proc_2' if use_large_dataset else f"{data_folder}/data_small/"

# Options to select different models to train

# Option to re-train the full model
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'full', '--model_name', 'full_trained'])

# Options to create a branch model, prepopulate it with weights from the pretrained full model, and train it's final FC layer only
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'left_eye', '--prepopulate', 'true', '--train_last_fc_only', 'true', '--model_name', 'left_eye_transfer_lastFCtrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'right_eye', '--prepopulate', 'true', '--train_last_fc_only', 'true', '--model_name', 'right_eye_transfer_lastFCtrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'joint_eye', '--prepopulate', 'true', '--train_last_fc_only', 'true', '--model_name', 'joint_eye_transfer_lastFCtrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'face', '--prepopulate', 'true', '--train_last_fc_only', 'true', '--model_name', 'face_transfer_lastFCtrained'])


# Options that are the same as above (create branch, prepopulate it with weights), except we re-train all layers of the branch (for fine-tuning)
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'left_eye', '--prepopulate', 'true', '--train_last_fc_only', 'false', '--model_name', 'left_eye_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'right_eye', '--prepopulate', 'true', '--train_last_fc_only', 'false', '--model_name', 'right_eye_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'joint_eye', '--prepopulate', 'true', '--train_last_fc_only', 'false', '--model_name', 'joint_eye_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'face', '--prepopulate', 'true', '--train_last_fc_only', 'false', '--model_name', 'face_transfer_fulltrained'])


# Options to create a branch model, and retrain it fully (no transfer learning)
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'left_eye',  '--model_name', 'left_eye_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'right_eye', '--model_name', 'right_eye_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'joint_eye', '--model_name', 'joint_eye_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'false', '--reset', 'true', '--model', 'face', '--model_name', 'face_fulltrained'])


# Options to load a pre-trained branch model, and test it
args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'true', '--reset', 'false', '--model', 'left_eye',  '--model_name', 'left_eye_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'true', '--reset', 'false', '--model', 'right_eye',  '--model_name', 'right_eye_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'true', '--reset', 'false', '--model', 'face',  '--model_name', 'face_transfer_fulltrained'])
# args = parser.parse_args(args=['--data_path', data_path, '--code_path', f'{code_folder}', '--sink', 'true', '--reset', 'false', '--model', 'full', '--model_name', 'full_pretrained'])


# Training and validation loops

In [None]:


import math, shutil, os, time, argparse
import numpy as np
import scipy.io as sio

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from tqdm.notebook import tqdm

'''
Train/test code for iTracker.

Author: Petr Kellnhofer ( pkel_lnho (at) gmai_l.com // remove underscores and spaces), 2018. 

Website: http://gazecapture.csail.mit.edu/

Cite:

Eye Tracking for Everyone
K.Krafka*, A. Khosla*, P. Kellnhofer, H. Kannan, S. Bhandarkar, W. Matusik and A. Torralba
IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016

@inproceedings{cvpr2016_gazecapture,
Author = {Kyle Krafka and Aditya Khosla and Petr Kellnhofer and Harini Kannan and Suchendra Bhandarkar and Wojciech Matusik and Antonio Torralba},
Title = {Eye Tracking for Everyone},
Year = {2016},
Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}
}

'''

# Change there flags to control what happens.

doTrainLastFC = args.train_last_fc_only # Train only the last FC layer (branch models only).
doPrepopulate = args.prepopulate # Prepopulate weights from full model into branch (branch models only).
doLoad = not args.reset # Load checkpoint at the beginning
doTest = args.sink # Only run test, no training

workers = 16
epochs = 1 # TODO: Play around with this value
batch_size = torch.cuda.device_count()*100 # Change if out of cuda memory

base_lr = 0.0001
momentum = 0.9
weight_decay = 1e-4
print_freq = 10
prec1 = 0
best_prec1 = 1e20
lr = base_lr

count_test = 0
count = 0



def main():
    global args, best_prec1, weight_decay, momentum
    


def train(train_loader, model, criterion,optimizer, epoch):
    global count
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()

    print(len(train_loader))
    for i, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in tqdm(enumerate(train_loader), total=len(train_loader)):
        
        # measure data loading time
        data_time.update(time.time() - end)
        imFace = imFace.cuda()
        imEyeL = imEyeL.cuda()
        imEyeR = imEyeR.cuda()
        faceGrid = faceGrid.cuda()
        gaze = gaze.cuda()
        
        imFace = torch.autograd.Variable(imFace, requires_grad = True)
        imEyeL = torch.autograd.Variable(imEyeL, requires_grad = True)
        imEyeR = torch.autograd.Variable(imEyeR, requires_grad = True)
        faceGrid = torch.autograd.Variable(faceGrid, requires_grad = True)
        gaze = torch.autograd.Variable(gaze, requires_grad = False)

        # compute output
        output = model(imFace, imEyeL, imEyeR, faceGrid)

        loss = criterion(output, gaze)
        
        losses.update(loss.data.item(), imFace.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        count=count+1

        print('Epoch (train): [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))

def validate(val_loader, model, criterion, epoch):
    global count_test
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    lossesLin = AverageMeter()

    # switch to evaluate mode
    model.eval()
    end = time.time()

    print("Number of test samples:", len(val_loader))
    print("Batch size:", batch_size)

    oIndex = 0
    for i, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in enumerate(val_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        imFace = imFace.cuda()
        imEyeL = imEyeL.cuda()
        imEyeR = imEyeR.cuda()
        faceGrid = faceGrid.cuda()
        gaze = gaze.cuda()
        
        imFace = torch.autograd.Variable(imFace, requires_grad = False)
        imEyeL = torch.autograd.Variable(imEyeL, requires_grad = False)
        imEyeR = torch.autograd.Variable(imEyeR, requires_grad = False)
        faceGrid = torch.autograd.Variable(faceGrid, requires_grad = False)
        gaze = torch.autograd.Variable(gaze, requires_grad = False)

        # compute output
        with torch.no_grad():
            output = model(imFace, imEyeL, imEyeR, faceGrid)

        loss = criterion(output, gaze)
        
        lossLin = output - gaze
        lossLin = torch.mul(lossLin,lossLin)
        lossLin = torch.sum(lossLin,1)
        lossLin = torch.mean(torch.sqrt(lossLin))

        losses.update(loss.data.item(), imFace.size(0))
        lossesLin.update(lossLin.item(), imFace.size(0))
     
        # compute gradient and do SGD step
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        print('Epoch (val): [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Error L2 {lossLin.val:.4f} ({lossLin.avg:.4f})\t'.format(
                    epoch, i, len(val_loader), batch_time=batch_time,
                   loss=losses,lossLin=lossesLin))

    return lossesLin.avg

CHECKPOINTS_PATH = f'{args.code_path}'

def load_checkpoint(filename='checkpoint.pth.tar'):
    filename = os.path.join(CHECKPOINTS_PATH, filename)
    print(filename)
    if not os.path.isfile(filename):
        print(f"no such filename {filename}")
        return None
    state = torch.load(filename)
    return state

def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    if not os.path.isdir(CHECKPOINTS_PATH):
        os.makedirs(CHECKPOINTS_PATH, 0o777)
    bestFilename = os.path.join(CHECKPOINTS_PATH, 'best_' + filename)
    filename = os.path.join(CHECKPOINTS_PATH, filename)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestFilename)


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = base_lr * (0.1 ** (epoch // 30))
    for param_group in optimizer.state_dict()['param_groups']:
        param_group['lr'] = lr



# (Code we modified) Loading our specified model

In [None]:

if args.model == 'left_eye':
  print("Creating left eye model.")
  model_branch = LeftEyeBranch()
  model_branch = torch.nn.DataParallel(model_branch).cuda()
  model = model_branch

elif args.model == 'right_eye':
  print("Creating right eye model.")
  model_branch = RightEyeBranch()
  model_branch = torch.nn.DataParallel(model_branch).cuda()
  model = model_branch

elif args.model == 'joint_eye':
  print("Creating joint eye model.")
  model_branch = JointEyeBranch()
  model_branch = torch.nn.DataParallel(model_branch).cuda()
  model = model_branch

elif args.model == 'face':
  print("Creating face model.")
  model_branch = FaceBranch()
  model_branch = torch.nn.DataParallel(model_branch).cuda()
  model = model_branch

elif args.model == 'full':
  pass
else:
    raise Exception("Model not recognized")

Creating left eye model.


RuntimeError: ignored

# (Code we modified) Preloading the full iTracker weights into our specific individual branches


In [None]:

if doPrepopulate or args.model == 'full':
  print("Loading full iTracker model.")
  model_full = ITrackerModel()
  model_full = torch.nn.DataParallel(model_full).cuda()    
  model = model_full


if doPrepopulate or (doLoad and args.model == 'full'):
  saved = load_checkpoint(f"checkpoint_full.pth.tar")
  if saved:
    print('Loading pretrained full iTracker model')
    print('Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1']))
    state = saved['state_dict']
    try:
      model_full.module.load_state_dict(state)
    except:
      model_full.load_state_dict(state)
    epoch = saved['epoch']
    best_prec1 = saved['best_prec1']
  else:
    raise Exception('Tried to read iTracker checkpoint, but failed!')

if doPrepopulate:
  if args.model == 'left_eye' or args.model == 'right_eye':
    print(f"Setting {args.model} model with full pretrained weights")
    for name, branch_param in model_branch.named_parameters():
      # Don't want to overwrite the last FC layers.
      if 'fc' in name:
        continue

      # Removes 'module' from name
      name = ".".join(name.split('.')[1:])

      # Can only overwrite on non-backpropogating nodes
      branch_param.requires_grad = False
      full_param_name = f"module.eyeModel.{name}"

      # Begin overwriting the branch weights with the trained full ones
      full_param = dict(model_full.state_dict())[full_param_name]
      param_dim = len(np.shape(full_param))
      if param_dim == 1:
        branch_param[:] = full_param[:]
      elif param_dim == 4:
        branch_param[:, :, :, :] = full_param[:, :, :, :]
      else:
        assert False, "Unexpected param dim"

      if not doTrainLastFC:
        # In this case, we will want to re-train this preloaded layer
        branch_param.requires_grad = True
        
    
    # Verify that overwriting succeeded
    assert dict(model_branch.state_dict())['module.features.4.weight'][0,0,0,0] == dict(model_full.state_dict())['module.eyeModel.features.4.weight'][0,0,0,0]
    model = model_branch

  elif args.model == 'joint_eye':
    print(f"Setting {args.model} model with full pretrained weights")
    for name, branch_param in model_branch.named_parameters():
      # Don't want to overwrite the last FC layers.
      if 'eyesFC.2' in name:
        continue

      # Can only overwrite on non-backpropogating nodes
      branch_param.requires_grad = False
      full_param_name = name

      # Begin overwriting the branch weights with the trained full ones
      full_param = dict(model_full.state_dict())[full_param_name]
      param_dim = len(np.shape(full_param))
      if param_dim == 1:
        branch_param[:] = full_param[:]
      elif param_dim == 4:
        branch_param[:, :, :, :] = full_param[:, :, :, :]
      elif param_dim == 2:
        branch_param[:, :] = full_param[:, :]
      else:
        assert False, "Unexpected param dim"

      if not doTrainLastFC:
        # In this case, we will want to re-train this preloaded layer
        branch_param.requires_grad = True

    # Verify that overwriting succeeded
    assert dict(model_branch.state_dict())['module.eyeModel.features.8.weight'][0,0,0,0] == dict(model_full.state_dict())['module.eyeModel.features.8.weight'][0,0,0,0]
    model = model_branch

  elif args.model == 'face':
    print(f"Setting {args.model} model with full pretrained weights")
    for name, branch_param in model_branch.named_parameters():
      # Don't want to overwrite the last FC layers.
      if 'fc.4' in name:
        continue

      # Removes 'module' from name
      name = ".".join(name.split('.')[1:])

      # Can only overwrite on non-backpropogating nodes
      branch_param.requires_grad = False
      full_param_name = f"module.faceModel.conv.{name}" if "features" in name else f"module.faceModel.{name}"

      # Begin overwriting the branch weights with the trained full ones
      full_param = dict(model_full.state_dict())[full_param_name]
      param_dim = len(np.shape(full_param))
      if param_dim == 1:
        branch_param[:] = full_param[:]
      elif param_dim == 4:
        branch_param[:, :, :, :] = full_param[:, :, :, :]
      elif param_dim == 2:
        branch_param[:, :] = full_param[:, :]
      else:
        assert False, "Unexpected param dim"

      if not doTrainLastFC:
        # In this case, we will want to re-train this preloaded layer
        branch_param.requires_grad = True

    # Verify that overwriting succeeded
    assert dict(model_branch.state_dict())['module.features.8.weight'][0,0,0,0] == dict(model_full.state_dict())['module.faceModel.conv.features.8.weight'][0,0,0,0]
    model = model_branch

# (Code we modified)  Load **our** pretrained branch models

In [None]:

if doLoad:
  if args.model != 'full':
    saved = load_checkpoint(f"best_checkpoint_{args.model_name}.pth.tar")
    if saved:
      print(f'Loading our pretrained {args.model_name} model')
      print('Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1']))
      state = saved['state_dict']
      try:
        model_branch.module.load_state_dict(state)
      except:
        model_branch.load_state_dict(state)
      epoch = saved['epoch']
      best_prec1 = saved['best_prec1']
    else:
      raise Exception('Tried to read iTracker checkpoint, but failed!')
    model = model_branch


# (Code we modified)  Specify train / val sets

In [None]:

imSize=(224,224)
cudnn.benchmark = True   
epoch = 0
if doTest: # In our sample dataset, we want the train files to be tested.
  print("Loading test dataset")
  dataTrain = ITrackerData(dataPath = args.data_path, split='train', imSize = imSize)
  dataVal = ITrackerData(dataPath = args.data_path, split='test', imSize = imSize)
  if use_large_dataset == False:
    dataVal = dataTrain
else:
  print("Loading train dataset")
  dataTrain = ITrackerData(dataPath = args.data_path, split='train', imSize = imSize)
  dataVal = ITrackerData(dataPath = args.data_path, split='test', imSize = imSize) 


#  Running the train and test loops

In [None]:

train_loader = torch.utils.data.DataLoader(
    dataTrain,
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    dataVal,
    batch_size=batch_size, shuffle=False,
    num_workers=workers, pin_memory=True)


criterion = nn.MSELoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

# Runs the training / test loops
if doTest:
  print("In test!")
  validate(val_loader, model, criterion, epoch)
else:
  print("In train!")
  for epoch in range(0, epoch):
    adjust_learning_rate(optimizer, epoch)
      
  print(epoch)
  for epoch in range(epoch, epochs):
    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # remember best prec@1 and save checkpoint
    # is_best = prec1 < best_prec1
    # best_prec1 = min(prec1, best_prec1)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec1': prec1,
    }, True, filename=f'checkpoint_{args.model_name}.pth.tar')

    # evaluate on validation set
    prec1 = validate(val_loader, model, criterion, epoch)

print('DONE')

# (Code we modified) DeepDream

In [None]:
import matplotlib.pyplot as plt

LEFT = [25., 0.]
RIGHT = [-25., 0.]
UP = [0., 25.]
DOWN = [0., -25.]


def normalize(image):
  im_min = np.min(image)
  im_max = np.max(image)
  return (image - im_min) / (im_max - im_min)


def get_best_indices(data_loader, K=[0], I=100):
  best_left, best_right, best_up, best_down = {}, {}, {}, {}
  k_index = 0

  for k, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in enumerate(data_loader):
    if k < K[k_index]:
      continue
    if k > K[k_index]:
      k_index += 1
      if k_index >= len(K):
        break
      continue
    print(k)
    curr_best_left, curr_best_right, curr_best_up, curr_best_down = None, None, None, None
    best_left_error, best_right_error, best_up_error, best_down_error = -1., -1., -1., -1.
    for i in range(I):
      left_error = torch.sum((gaze[i] - torch.tensor(LEFT)) ** 2)
      if best_left_error < 0. or left_error < best_left_error:
        curr_best_left = i
        best_left_error = left_error
      
      right_error = torch.sum((gaze[i] - torch.tensor(RIGHT)) ** 2)
      if best_right_error < 0. or right_error < best_right_error:
        curr_best_right = i
        best_right_error = right_error
      
      up_error = torch.sum((gaze[i] - torch.tensor(UP)) ** 2)
      if best_up_error < 0. or up_error < best_up_error:
        curr_best_up = i
        best_up_error = up_error
      
      down_error = torch.sum((gaze[i] - torch.tensor(DOWN)) ** 2)
      if best_down_error < 0. or down_error <best_down_error:
        curr_best_down = i
        best_down_error = down_error
    
    best_left[k] = curr_best_left
    best_right[k] = curr_best_right
    best_up[k] = curr_best_up
    best_down[k] = curr_best_down
  
  return best_left, best_right, best_up, best_down


def deep_dream(model, data_loader, target, K, indices, image_type, label, diffs_list=None):
  results_folder = "./drive/MyDrive/CSC413Project/results/"
  target = torch.tensor([target]).cuda()
  k_index = 0

  for k, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in enumerate(data_loader):
    if k < K[k_index]:
      continue
    if k > K[k_index]:
      k_index += 1
      if k_index >= len(K):
        break
      continue
    i = indices[k]
    print("Start", f"k={k}", f"i={i}")

    # Get example
    test_imFace, test_imEyeL, test_imEyeR, test_faceGrid = imFace[i], imEyeL[i], imEyeR[i], faceGrid[i]
    start_imFace = test_imFace.numpy().copy().transpose(1,2,0)
    start_imEyeL = test_imEyeL.numpy().copy().transpose(1,2,0)
    start_imEyeR = test_imEyeR.numpy().copy().transpose(1,2,0)
    
    # Create variables
    test_imFace = torch.autograd.Variable(test_imFace, requires_grad=True)
    test_imEyeL = torch.autograd.Variable(test_imEyeL, requires_grad=True)
    test_imEyeR = torch.autograd.Variable(test_imEyeR, requires_grad=True)
    test_faceGrid = torch.autograd.Variable(test_faceGrid, requires_grad=True)
    opt = torch.optim.SGD([test_imFace, test_imEyeL, test_imEyeR, test_faceGrid], lr=1e-3)

    # Train
    print("Train", f"k={k}", f"i={i}")
    model.eval()
    n_epoch = 1000
    for n in range(n_epoch):
      opt.zero_grad()
      output = model(test_imFace.unsqueeze(0), test_imEyeL.unsqueeze(0), test_imEyeR.unsqueeze(0), test_faceGrid.unsqueeze(0))
      loss = torch.sum((output - target) ** 2)
      loss.backward()
      opt.step()

    diff_sum = -1
    if args.model == 'left_eye':
      diff_sum = np.sum(normalize((test_imEyeL.detach().numpy().transpose(1,2,0) - start_imEyeL) ** 2))
    elif args.model == 'right_eye':
      diff_sum = np.sum(normalize((test_imEyeR.detach().numpy().transpose(1,2,0) - start_imEyeR) ** 2 ))
    elif args.model == 'face':
      diff_sum = np.sum(normalize((test_imFace.detach().numpy().transpose(1,2,0) - start_imFace) ** 2))
    elif args.model == 'full':
      diff_sum_left = np.sum(normalize((test_imEyeL.detach().numpy().transpose(1,2,0) - start_imEyeL) ** 2))
      diff_sum_right = np.sum(normalize((test_imEyeR.detach().numpy().transpose(1,2,0) - start_imEyeR) ** 2 ))
      diff_sum_face = np.sum(normalize((test_imFace.detach().numpy().transpose(1,2,0) - start_imFace) ** 2))
      diffs_list['left'].append(diff_sum_left)
      diffs_list['right'].append(diff_sum_right)
      diffs_list['face'].append(diff_sum_face)


    # print(f"diff,{diff_sum}")
    # if diffs_list is not None:
      # diffs_list.append(diff_sum)

    # Plot results
    print("Plot", f"k={k}", f"i={i}")
    eps = 1e-5
    if image_type == 'face' or image_type == 'full':
      plt.imshow(normalize(start_imFace))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_face_{label}_orig_{k},{i}.png")
      plt.imshow(normalize(test_imFace.detach().numpy().transpose(1,2,0)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_face_{label}_learned_{k},{i}.png")
      plt.imshow(normalize(np.log((test_imFace.detach().numpy().transpose(1,2,0) - start_imFace) ** 2 + eps)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_face_{label}_logdiff_{k},{i}.png")
      plt.imshow(normalize((test_imFace.detach().numpy().transpose(1,2,0) - start_imFace) ** 2))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_face_{label}_diff_{k},{i}.png")
    if image_type == 'lefteye' or image_type == 'full':
      plt.imshow(normalize(start_imEyeL))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_lefteye_{label}_orig_{k},{i}.png")
      plt.imshow(normalize(test_imEyeL.detach().numpy().transpose(1,2,0)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_lefteye_{label}_learned_{k},{i}.png")
      plt.imshow(normalize(np.log((test_imEyeL.detach().numpy().transpose(1,2,0) - start_imEyeL) ** 2 + eps)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_lefteye_{label}_logdiff_{k},{i}.png")
      plt.imshow(normalize((test_imEyeL.detach().numpy().transpose(1,2,0) - start_imEyeL) ** 2))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_lefteye_{label}_diff_{k},{i}.png")
    if image_type == 'righteye' or image_type == 'full':
      plt.imshow(normalize(start_imEyeR))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_righteye_{label}_orig_{k},{i}.png")
      plt.imshow(normalize(test_imEyeR.detach().numpy().transpose(1,2,0)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_righteye_{label}_learned_{k},{i}.png")
      plt.imshow(normalize(np.log((test_imEyeR.detach().numpy().transpose(1,2,0) - start_imEyeR) ** 2 + eps)))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_righteye_{label}_logdiff_{k},{i}.png")
      plt.imshow(normalize((test_imEyeR.detach().numpy().transpose(1,2,0) - start_imEyeR) ** 2 ))
      plt.axis('off')
      plt.savefig(results_folder+f"results_deepdream_righteye_{label}_diff_{k},{i}.png")

In [None]:
K = [k * 15 for k in range(20)]

# We used 'get_max_indices' to find these indices
# Looking left
left_indices = {0: 48, 15: 13, 30: 0, 45: 66, 60: 55, 75: 75, 90: 79, 105: 15, 120: 0, 135: 49, 150: 7, 165: 16, 180: 30, 195: 37, 210: 17, 225: 28, 240: 17, 255: 6, 270: 52, 285: 78}
# Looking right
right_indices = {0: 41, 15: 80, 30: 99, 45: 73, 60: 30, 75: 56, 90: 89, 105: 83, 120: 5, 135: 13, 150: 28, 165: 34, 180: 42, 195: 87, 210: 0, 225: 18, 240: 84, 255: 89, 270: 47, 285: 47}
# Looking up
up_indices = {0: 48, 15: 84, 30: 99, 45: 45, 60: 55, 75: 13, 90: 70, 105: 83, 120: 14, 135: 10, 150: 92, 165: 22, 180: 26, 195: 25, 210: 1, 225: 72, 240: 99, 255: 60, 270: 74, 285: 92}
# Looking down
down_indices = {0: 8, 15: 55, 30: 12, 45: 81, 60: 80, 75: 19, 90: 93, 105: 31, 120: 19, 135: 80, 150: 42, 165: 66, 180: 81, 195: 79, 210: 31, 225: 88, 240: 87, 255: 89, 270: 17, 285: 14}

# Hand-picked indices
indices = {0: 8, 15: 80, 75: 13}

In [None]:
img_type = 'noplot'
diffs_list = {'left': [], 'right': [], 'face': []}

print("left2right")
deep_dream(model, train_loader, RIGHT, K, left_indices, img_type, "left2right", diffs_list)

print("diffs list:")
print(diffs_list)


diffs_list = {'left': [], 'right': [], 'face': []}
print("right2left")
deep_dream(model, train_loader, LEFT, K, right_indices, img_type, "right2left", diffs_list)

print("diffs list:")
print(diffs_list)

diffs_list = {'left': [], 'right': [], 'face': []}
print("up2down")
deep_dream(model, train_loader, DOWN, K, up_indices, img_type, "up2down", diffs_list)

print("diffs list:")
print(diffs_list)

diffs_list = {'left': [], 'right': [], 'face': []}
print("down2up")
deep_dream(model, train_loader, UP, K, down_indices, img_type, "down2up", diffs_list)

print("diffs list:")
print(diffs_list)

# SmoothGrad

## SmoothGrad classes

In [None]:
class VanillaGrad(object):

    def __init__(self, pretrained_model, cuda=False):
        self.pretrained_model = pretrained_model
        self.cuda = cuda
        #self.pretrained_model.eval()

    def __call__(self, x, index=None):
        output = self.pretrained_model(x)

        if index is None:
            index = np.argmax(output.data.cpu().numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        if self.cuda:
            one_hot = Variable(torch.from_numpy(one_hot).cuda(), requires_grad=True)
        else:
            one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True)
        one_hot = torch.sum(one_hot * output)

        one_hot.backward()

        grad = x.grad.data.cpu().numpy()
        grad = grad[0, :, :, :]

        return grad

# SmoothGrad class - modified to work with iTracker
class SmoothGrad(VanillaGrad):

    def __init__(self, pretrained_model, cuda=False, stdev_spread=0.15,
                 n_samples=25, magnitude=True):
        super(SmoothGrad, self).__init__(pretrained_model, cuda)
        """
        self.pretrained_model = pretrained_model
        self.features = pretrained_model.features
        self.cuda = cuda
        self.pretrained_model.eval()
        """
        self.stdev_spread = stdev_spread
        self.n_samples = n_samples
        self.magnitutde = magnitude

    def __call__(self, x, index=None):
        imFace, imEyeL, imEyeR, faceGrid = x
        imFace = imFace.data.cpu().numpy()
        imEyeL = imEyeL.data.cpu().numpy()
        imEyeR = imEyeR.data.cpu().numpy()

        # Each branch will be treated separately
        stdevFace = self.stdev_spread * (np.max(imFace) - np.min(imFace))
        total_gradients_face = np.zeros_like(imFace)
        stdevEyeL = self.stdev_spread * (np.max(imEyeL) - np.min(imEyeL))
        total_gradients_eyeL= np.zeros_like(imEyeL)
        stdevEyeR = self.stdev_spread * (np.max(imEyeR) - np.min(imEyeR))
        total_gradients_eyeR = np.zeros_like(imEyeR)
        
        for i in range(self.n_samples):
            noiseFace = np.random.normal(0, stdevFace, imFace.shape).astype(np.float32)
            noiseEyeL = np.random.normal(0, stdevEyeL, imEyeL.shape).astype(np.float32)
            noiseEyeR = np.random.normal(0, stdevEyeR, imEyeR.shape).astype(np.float32)

            imFace_plus_noise = imFace + noiseFace
            imEyeL_plus_noise = imEyeL + noiseEyeL
            imEyeR_plus_noise = imEyeR + noiseEyeR

            if self.cuda:
                imFace_plus_noise = Variable(torch.from_numpy(imFace_plus_noise).cuda(), requires_grad=True)
                imEyeL_plus_noise = Variable(torch.from_numpy(imEyeL_plus_noise).cuda(), requires_grad=True)
                imEyeR_plus_noise = Variable(torch.from_numpy(imEyeR_plus_noise).cuda(), requires_grad=True)
            else:
                imFace_plus_noise = Variable(torch.from_numpy(imFace_plus_noise), requires_grad=True)
                imEyeL_plus_noise = Variable(torch.from_numpy(imEyeL_plus_noise), requires_grad=True)
                imEyeR_plus_noise = Variable(torch.from_numpy(imEyeR_plus_noise), requires_grad=True)

            output = self.pretrained_model(imFace_plus_noise, imEyeL_plus_noise, imEyeR_plus_noise, faceGrid)

            if index is None:
                index = np.argmax(output.data.cpu().numpy())

            one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
            one_hot[0][index] = 1
            if self.cuda:
                one_hot = Variable(torch.from_numpy(one_hot).cuda(), requires_grad=True)
            else:
                one_hot = Variable(torch.from_numpy(one_hot), requires_grad=True)
            one_hot = torch.sum(one_hot * output)

            if imFace_plus_noise.grad is not None:
                imFace_plus_noise.grad.data.zero_()
            if imEyeL_plus_noise.grad is not None:
                imEyeL_plus_noise.grad.data.zero_()
            if imEyeR_plus_noise.grad is not None:
                imEyeR_plus_noise.grad.data.zero_()
            one_hot.backward()

            # Each model/branch computes different gradients
            if args.model == 'left_eye':
                gradEyeL  = imEyeL_plus_noise.grad.data.cpu().numpy()
                if self.magnitutde:
                    total_gradients_eyeL += (gradEyeL * gradEyeL)
                else:
                    total_gradients_eyeL += gradEyeL
                total_gradients_eyeR = np.zeros(total_gradients_eyeL.shape)
                total_gradients_face = np.zeros(total_gradients_eyeL.shape)

            elif args.model == 'right_eye':
                gradEyeR  = imEyeR_plus_noise.grad.data.cpu().numpy()
                if self.magnitutde:
                    total_gradients_eyeR += (gradEyeR * gradEyeR)
                else:
                    total_gradients_eyeR += gradEyeR

            elif args.model == 'joint_eye':
                gradEyeL  = imEyeL_plus_noise.grad.data.cpu().numpy()
                gradEyeR  = imEyeR_plus_noise.grad.data.cpu().numpy()
                if self.magnitutde:
                    total_gradients_eyeL += (gradEyeL * gradEyeL)
                    total_gradients_eyeR += (gradEyeR * gradEyeR)
                else:
                    total_gradients_eyeL += gradEyeL
                    total_gradients_eyeR += gradEyeR

            elif args.model == 'face':
                gradFace  = imFace_plus_noise.grad.data.cpu().numpy()
                if self.magnitutde:
                    total_gradients_face += (gradFace * gradFace)
                else:
                    total_gradients_face += gradFace
                
            else: # full
                gradFace  = imFace_plus_noise.grad.data.cpu().numpy()
                gradEyeL  = imEyeL_plus_noise.grad.data.cpu().numpy()
                gradEyeR  = imEyeR_plus_noise.grad.data.cpu().numpy()
                if self.magnitutde:
                    total_gradients_eyeL += (gradEyeL * gradEyeL)
                    total_gradients_eyeR += (gradEyeR * gradEyeR)
                    total_gradients_face += (gradFace * gradFace)
                else:
                    total_gradients_face += gradFace
                    total_gradients_eyeL += gradEyeL
                    total_gradients_eyeR += gradEyeR

            avg_gradients_eyeL = total_gradients_eyeL[0, :, :, :] / self.n_samples
            avg_gradients_face = total_gradients_face[0, :, :, :] / self.n_samples
            avg_gradients_eyeR = total_gradients_eyeR[0, :, :, :] / self.n_samples

        return avg_gradients_face, avg_gradients_eyeL, avg_gradients_eyeR

def preprocess_image(img, cuda=False):
    means=[0.485, 0.456, 0.406]
    stds=[0.229, 0.224, 0.225]

    preprocessed_img = img.copy()[: , :, ::-1]
    for i in range(3):
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
    preprocessed_img = \
        np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
    preprocessed_img = torch.from_numpy(preprocessed_img)
    preprocessed_img.unsqueeze_(0)
    if cuda:
        preprocessed_img = Variable(preprocessed_img.cuda(), requires_grad=True)
    else:
        preprocessed_img = Variable(preprocessed_img, requires_grad=True)

    return preprocessed_img

# Save images (if 2D, will be saved as gray image)
def save_image(file_path, img, orig, percentile=99):
    if not orig:
      img = np.sum(img, axis=0)
      span = abs(np.percentile(img, percentile))
      vmin = -span
      vmax = span
      img = np.clip((img - vmin) / (vmax - vmin), -1, 1)

    plt.imsave(file_path, img, cmap='gray')
    return img

## (Code we modified) Helper Functions

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import csv   

# Normalize numpy matrices
def normalize(image):
  im_min = np.min(image)
  im_max = np.max(image)
  return (image - im_min) / (im_max - im_min)

# Indices of pictures looking far left, right, up, down
def get_max_indices():
  batch_size = 100
  test_indices = []
  min_x, max_x, min_y, max_y = float("inf"), float("-inf"), float("inf"), float("-inf")
  for k, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in enumerate(val_loader):
    min_x, max_y, min_y, max_y = float("inf"), float("-inf"), float("inf"), float("-inf")
    for i in range(batch_size):
      if gaze[i][1] < min_x:
        min_x = gaze[i][0]
        min_x_i = i
      if gaze[i][1] > max_x:
        max_x = gaze[i][0]
        max_x_i = i
      if gaze[i][0] < min_y:
        min_y = gaze[i][1]
        min_y_i = i
      if gaze[i][0] > max_y:
        max_y = gaze[i][1]
        max_y_i = i
    test_indices.append([min_y_i, max_y_i, min_x_i, max_x_i])
    if k >= 19:
      break
  test_indices = np.array(test_indices)
  return test_indices

# Save the SmoothGrad results
def save_matrix_sum(file_path, sum_matrix):
    header = 'Branch,Batch,Index,Sum'
    np.savetxt(file_path, sum_matrix, delimiter=",", fmt='%s', header=header)

## (Code we modified) SmoothGrad Run

In [None]:
target_index = None
use_cuda = False
results_folder = "./drive/MyDrive/CSC413Project/results_smoothgrad/"

# Compute smooth gradient
smooth_grad = SmoothGrad(
    pretrained_model=model,
    cuda=use_cuda,
    magnitude=True)

In [None]:
sum_matrices = []
for k, (row, imFace, imEyeL, imEyeR, faceGrid, gaze) in enumerate(train_loader):
    # Left, Right, Down, Up
    for batch_i, img_i in indices.items():
        if k is batch_i:
            # Preprocess the images to be passed into SmoothGrad
            test_imFace = normalize(imFace[img_i].numpy().copy().transpose(1,2,0))
            preprocessed_imFace = preprocess_image(test_imFace, use_cuda)

            test_imEyeL = normalize(imEyeL[img_i].numpy().copy().transpose(1,2,0))
            preprocessed_imEyeL = preprocess_image(test_imEyeL, use_cuda)

            test_imEyeR = normalize(imEyeR[img_i].numpy().copy().transpose(1,2,0))
            preprocessed_imEyeR = preprocess_image(test_imEyeR, use_cuda)

            test_faceGrid = faceGrid[img_i].unsqueeze(0)

            avg_gradients_face, avg_gradients_eyeL, avg_gradients_eyeR = smooth_grad((preprocessed_imFace, preprocessed_imEyeL, preprocessed_imEyeR, test_faceGrid), index=target_index)

            # Save images and results depending on the branches and models
            if args.model == 'left_eye':
                save_image(results_folder+f"{args.model}_orig_{k},{img_i}.png", test_imEyeL, True)
                save_image(results_folder+f"{args.model}_smoothgrad_{k},{img_i}.png", avg_gradients_eyeL, False)
                sum_matrix = np.sum(avg_gradients_eyeL)

                sum_matrices.append(['Left',k,img_i,sum_matrix])

            elif args.model == 'right_eye':
                save_image(results_folder+f"{args.model}_orig_{k},{img_i}.png", test_imEyeR, True)
                save_image(results_folder+f"{args.model}_smoothgrad_{k},{img_i}.png", avg_gradients_eyeR, False)
                sum_matrix = np.sum(avg_gradients_eyeR)

                sum_matrices.append(['Right',k,img_i,sum_matrix])

            elif args.model == 'face':
                save_image(results_folder+f"{args.model}_orig_{k},{img_i}.png", test_imFace, True)
                save_image(results_folder+f"{args.model}_smoothgrad_{k},{img_i}.png", avg_gradients_face, False)
                sum_matrix = np.sum(avg_gradients_face)

                sum_matrices.append(['Face',k,img_i,sum_matrix])

            elif args.model == 'full':
                save_image(results_folder+f"{args.model}_left_eye_orig_{k},{img_i}.png", test_imEyeL, True)
                save_image(results_folder+f"{args.model}_right_eye_orig_{k},{img_i}.png", test_imEyeR, True)
                save_image(results_folder+f"{args.model}_face_orig_{k},{img_i}.png", test_imFace, True)
                save_image(results_folder+f"{args.model}_left_eye_smoothgrad_{k},{img_i}.png", avg_gradients_eyeL, False)
                save_image(results_folder+f"{args.model}_right_eye_smoothgrad_{k},{img_i}.png", avg_gradients_eyeR, False)
                save_image(results_folder+f"{args.model}_face_smoothgrad_{k},{img_i}.png", avg_gradients_face, False)
                
                sum_matrix = np.sum(avg_gradients_eyeL)
                sum_matrices.append(['Full Left',k,img_i,sum_matrix])
                sum_matrix = np.sum(avg_gradients_eyeR)
                sum_matrices.append(['Full Right',k,img_i,sum_matrix])
                sum_matrix = np.sum(avg_gradients_face)
                sum_matrices.append(['Full Face',k,img_i,sum_matrix])

    save_matrix_sum(results_folder+f"{args.model}_sum_matrix.csv", np.array(sum_matrices))