## Installation

## Imports

In [1]:
# Import obf functionality
import sys
from pathlib import Path
sys.path.append(str(Path("../../OBF").resolve()))

from obf.model import ae
from obf.model import creator


In [2]:
from eyemind.dataloading.load_dataset import limit_sequence_len, get_label_mapper, get_filenames_for_dataset, create_filename_col, get_stratified_group_splits
from eyemind.dataloading.gaze_data import GazeDataModule
from eyemind.models.classifier import EncoderClassifierModel
# from eyemind.models import creator
# from eyemind.models import ae 
import pandas as pd
import torch
from torch.utils.data import SubsetRandomSampler, DataLoader
from pytorch_lightning import Trainer


## Data Loading

In [3]:
data_folder = Path("/Users/rickgentry/emotive_lab/eyemind/data/preprocessed/output")
label_filepath = Path("/Users/rickgentry/emotive_lab/eyemind/data/EML1_pageLevel.csv")

In [4]:
# Read the labels and create id
label_df = pd.read_csv(label_filepath)
label_df = create_filename_col(label_df)

In [5]:
label_df[~label_df["Rote_X"].isna()]

Unnamed: 0,ParticipantID,Text,PageNum,datetime,unix_start,unix_end,readtime,MW,SVT,Rote_X,Inference_X,Deep_X,Rote_Y,Inference_Y,Rote_Z,Inference_Z,Deep_Z,Rote_D,Inference_D,filename
2,EML1_001,Bias,3,,,,33.862,1.0,1.0,1.0,1.0,1.0,,,,,,,,EML1_001-Bias2
6,EML1_001,Bias,7,,,,23.788,0.0,1.0,1.0,1.0,1.0,,,,,,1.0,,EML1_001-Bias6
12,EML1_001,CausalClaims,3,,,,26.138,1.0,0.0,1.0,1.0,1.0,,,,,,,,EML1_001-CausalClaims2
18,EML1_001,CausalClaims,9,,,,17.016,1.0,0.0,0.0,1.0,1.0,,,,,,,,EML1_001-CausalClaims8
20,EML1_001,Hypotheses,2,,,,,1.0,1.0,1.0,1.0,1.0,,,,,,,,EML1_001-Hypotheses1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7859,EML1_167,Hypotheses,9,,,,,1.0,1.0,0.0,0.0,0.0,,,,,,,,EML1_167-Hypotheses8
7860,EML1_167,Validity,2,,,,,1.0,1.0,1.0,1.0,0.0,,,,,,,,EML1_167-Validity1
7864,EML1_167,Validity,8,,,,,1.0,0.0,0.0,1.0,0.0,,,,,,,,EML1_167-Validity7
7869,EML1_167,Variables,6,,,,,1.0,1.0,0.0,0.0,1.0,,,,,,,,EML1_167-Variables5


In [6]:
# Define label columns for creating datasets
label_cols = ["Rote_X", "Inference_X", "Deep_X", "Rote_Y", "Inference_Y", "Rote_Z", "Inference_Z", "Deep_Z", "Rote_D", "Inference_D"]

In [7]:
def get_datasets(label_cols, label_df, data_folder, x_transforms=None, y_transforms=None, id_col="filename"):
    l_ds = []
    for label_col in label_cols:
        filenames = get_filenames_for_dataset(label_df, data_folder, id_col, label_col)
        label_mapper = get_label_mapper(label_df, id_col, label_col)
        ds = GazeDataModule(data_folder, file_list=filenames, label_mapper=label_mapper, transform_x=x_transforms, transform_y=y_transforms)
        l_ds.append((label_col,ds))
    return l_ds
    

In [8]:
l_ds = get_datasets(["Rote_X"], label_df, data_folder, x_transforms=[limit_sequence_len,lambda data: torch.tensor(data).float()], y_transforms=[lambda data: torch.tensor(data).float()])

In [9]:
files = [f.split(".")[0] for f in l_ds[0][1].file_list]

In [10]:
# Get Splits
splits = get_stratified_group_splits(files, label_df, l_ds[0][0], "filename")

In [11]:
# Test with one split
train_split, val_split = next(splits)

In [12]:
train_split

array([   1,    2,    3,    5,    6,    7,    9,   10,   11,   13,   14,
         15,   17,   18,   19,   21,   22,   23,   25,   26,   27,   29,
         30,   31,   32,   34,   35,   37,   38,   39,   41,   42,   43,
         45,   46,   47,   49,   50,   51,   53,   54,   55,   56,   58,
         59,   61,   62,   63,   64,   65,   66,   69,   70,   71,   73,
         74,   75,   76,   78,   79,   81,   82,   83,   84,   86,   87,
         88,   90,   91,   93,   94,   95,   97,   98,   99,  100,  102,
        103,  104,  106,  107,  109,  110,  111,  113,  114,  115,  117,
        118,  119,  121,  122,  123,  125,  126,  127,  128,  130,  131,
        132,  133,  135,  136,  138,  139,  141,  142,  143,  145,  146,
        147,  148,  150,  151,  153,  154,  155,  157,  158,  159,  160,
        162,  163,  165,  166,  167,  168,  170,  171,  173,  174,  175,
        176,  178,  179,  181,  182,  183,  185,  186,  187,  189,  190,
        191,  193,  194,  195,  197,  198,  199,  2

In [13]:
# Setup datamodule
dm = l_ds[0][1]
dm.setup(stage="fit")

1251 1251


In [14]:
# Get dataloader
train_sampler = SubsetRandomSampler(train_split)
train_dl = dm.train_dataloader(sampler=train_sampler)
val_sampler = SubsetRandomSampler(val_split)
val_dl = dm.val_dataloader(sampler=val_sampler)

## Training

In [15]:
pre_trained_weights_dir = Path("../trained_models/obf_weights")

In [16]:
# Load pretrained encoder
encoder = creator.load_encoder(str(pre_trained_weights_dir.resolve()))

Loading:  /Users/rickgentry/emotive_lab/eyemind/trained_models/obf_weights/encoder_1633040995_gru.pt


In [17]:
model = EncoderClassifierModel(encoder, cuda=False)



In [25]:
# Trainer
trainer = Trainer()


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [26]:
# Find learning rate
trainer.fit(model, train_dl, val_dl)


  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 1:  44%|████▍     | 69/156 [00:50<01:03,  1.37it/s, loss=nan, v_num=0, train_loss_step=0.766, train_accuracy_step=0.375, val_loss_step=0.692, val_accuracy_step=0.750, val_loss_epoch=0.693, val_accuracy_epoch=0.529, val_auroc=0.354, train_loss_epoch=0.750, train_accuracy_epoch=0.469, train_auroc=0.465] 