## Installation

## Imports

In [26]:
# Import obf functionality
import sys
from pathlib import Path
sys.path.append(str(Path("../../OBF").resolve()))

from obf.model import ae
from obf.model import creator


In [27]:
from eyemind.dataloading.load_dataset import limit_sequence_len, get_label_mapper, get_filenames_for_dataset, create_filename_col, get_stratified_group_splits
from eyemind.dataloading.gaze_data import GazeDataModule
from eyemind.models.classifier import EncoderClassifierModel
# from eyemind.models import creator
# from eyemind.models import ae 
import pandas as pd
import torch
from torch.utils.data import SubsetRandomSampler, DataLoader
from pytorch_lightning import Trainer


## Data Loading

In [3]:
data_folder = Path("/Users/rickgentry/emotive_lab/eyemind/data/preprocessed/output")
label_filepath = Path("/Users/rickgentry/emotive_lab/eyemind/data/EML1_pageLevel.csv")

In [4]:
# Read the labels and create id
label_df = pd.read_csv(label_filepath)
label_df = create_filename_col(label_df)

In [5]:
label_df[~label_df["Inference_X"].isna()]

Unnamed: 0,ParticipantID,Text,PageNum,datetime,unix_start,unix_end,readtime,MW,SVT,Rote_X,Inference_X,Deep_X,Rote_Y,Inference_Y,Rote_Z,Inference_Z,Deep_Z,Rote_D,Inference_D,filename
2,EML1_001,Bias,3,,,,33.862,1.0,1.0,1.0,1.0,1.0,,,,,,,,EML1_001-Bias2
6,EML1_001,Bias,7,,,,23.788,0.0,1.0,1.0,1.0,1.0,,,,,,1.0,,EML1_001-Bias6
12,EML1_001,CausalClaims,3,,,,26.138,1.0,0.0,1.0,1.0,1.0,,,,,,,,EML1_001-CausalClaims2
18,EML1_001,CausalClaims,9,,,,17.016,1.0,0.0,0.0,1.0,1.0,,,,,,,,EML1_001-CausalClaims8
20,EML1_001,Hypotheses,2,,,,,1.0,1.0,1.0,1.0,1.0,,,,,,,,EML1_001-Hypotheses1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7859,EML1_167,Hypotheses,9,,,,,1.0,1.0,0.0,0.0,0.0,,,,,,,,EML1_167-Hypotheses8
7860,EML1_167,Validity,2,,,,,1.0,1.0,1.0,1.0,0.0,,,,,,,,EML1_167-Validity1
7864,EML1_167,Validity,8,,,,,1.0,0.0,0.0,1.0,0.0,,,,,,,,EML1_167-Validity7
7869,EML1_167,Variables,6,,,,,1.0,1.0,0.0,0.0,1.0,,,,,,,,EML1_167-Variables5


In [6]:
# Define label columns for creating datasets
label_cols = ["Rote_X", "Inference_X", "Deep_X", "Rote_Y", "Inference_Y", "Rote_Z", "Inference_Z", "Deep_Z", "Rote_D", "Inference_D"]

In [7]:
def get_datasets(label_cols, label_df, data_folder, x_transforms=None, y_transforms=None, id_col="filename"):
    l_ds = []
    for label_col in label_cols:
        filenames = get_filenames_for_dataset(label_df, data_folder, label_col)
        label_mapper = get_label_mapper(label_df, label_col)
        ds = GazeDataModule(data_folder, file_list=filenames, label_mapper=label_mapper, transform_x=x_transforms, transform_y=y_transforms)
        l_ds.append((label_col,ds))
    return l_ds
    

In [8]:
l_ds = get_datasets(["Rote_X"], label_df, data_folder, x_transforms=[limit_sequence_len,lambda data: torch.tensor(data).float()], y_transforms=[lambda data: torch.tensor(data).float()])

In [9]:
def get_all_splits(dms, label_df, id_col="filename"):
    all_splits = []
    for label_name, dm in dms:
        files = [f.split(".")[0] for f in dm.file_list]
        splits = get_stratified_group_splits(files, label_df, label_name, id_col)
        all_splits.append(splits)
    return all_splits

In [10]:
files = [f.split(".")[0] for f in l_ds[0][1].file_list]

In [11]:
files

['EML1_115-Hypotheses5',
 'EML1_125-Hypotheses8',
 'EML1_103-Hypotheses4',
 'EML1_021-Variables8',
 'EML1_041-Variables3',
 'EML1_049-CausalClaims6',
 'EML1_112-Validity6',
 'EML1_096-Validity8',
 'EML1_035-Variables8',
 'EML1_079-Bias2',
 'EML1_078-CausalClaims6',
 'EML1_090-Variables8',
 'EML1_122-Hypotheses4',
 'EML1_066-Hypotheses4',
 'EML1_008-Variables8',
 'EML1_132-CausalClaims6',
 'EML1_029-Bias2',
 'EML1_063-Hypotheses8',
 'EML1_070-Variables1',
 'EML1_048-Hypotheses1',
 'EML1_149-Hypotheses4',
 'EML1_124-Variables5',
 'EML1_121-Hypotheses5',
 'EML1_049-Bias6',
 'EML1_133-Bias6',
 'EML1_068-Bias2',
 'EML1_041-Hypotheses1',
 'EML1_035-Hypotheses5',
 'EML1_003-Bias2',
 'EML1_047-Variables5',
 'EML1_056-Hypotheses5',
 'EML1_014-Variables3',
 'EML1_116-Variables3',
 'EML1_063-CausalClaims3',
 'EML1_012-CausalClaims8',
 'EML1_041-Bias8',
 'EML1_049-Bias8',
 'EML1_134-Variables8',
 'EML1_025-Variables3',
 'EML1_102-Variables1',
 'EML1_026-Validity8',
 'EML1_101-CausalClaims6',
 'EML

In [12]:
# Get Splits
splits = get_stratified_group_splits(files, label_df, l_ds[0][0], "filename")

In [13]:
# Test with one split
train_split, val_split = next(splits)

In [14]:
train_split

array([   1,    2,    3,    5,    6,    7,    9,   10,   11,   13,   14,
         15,   17,   18,   19,   21,   22,   23,   25,   26,   27,   29,
         30,   31,   32,   34,   35,   37,   38,   39,   41,   42,   43,
         45,   46,   47,   49,   50,   51,   53,   54,   55,   56,   58,
         59,   61,   62,   63,   64,   65,   66,   69,   70,   71,   73,
         74,   75,   76,   78,   79,   81,   82,   83,   84,   86,   87,
         88,   90,   91,   93,   94,   95,   97,   98,   99,  100,  102,
        103,  104,  106,  107,  109,  110,  111,  113,  114,  115,  117,
        118,  119,  121,  122,  123,  125,  126,  127,  128,  130,  131,
        132,  133,  135,  136,  138,  139,  141,  142,  143,  145,  146,
        147,  148,  150,  151,  153,  154,  155,  157,  158,  159,  160,
        162,  163,  165,  166,  167,  168,  170,  171,  173,  174,  175,
        176,  178,  179,  181,  182,  183,  185,  186,  187,  189,  190,
        191,  193,  194,  195,  197,  198,  199,  2

In [15]:
# Setup datamodule
dm = l_ds[0][1]
dm.setup(stage="fit")

1251 1251


In [16]:
# Get dataloader
def get_dataloaders_from_split(dm, train_split, val_split):
    train_sampler = SubsetRandomSampler(train_split)
    train_dl = dm.train_dataloader(sampler=train_sampler)
    val_sampler = SubsetRandomSampler(val_split)
    val_dl = dm.val_dataloader(sampler=val_sampler)
    return train_dl, val_dl

In [28]:
train_dl, val_dl = get_dataloaders_from_split(dm, train_split, val_split)

## Training

In [29]:
pre_trained_weights_dir = Path("../OBF/pre_weights/sample_weights")

In [30]:
# Load pretrained encoder
encoder = creator.load_encoder(str(pre_trained_weights_dir.resolve()))

Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt


In [31]:
model = EncoderClassifierModel(encoder, cuda=False, freeze_encoder=False)

In [32]:
logger = TensorBoardLogger("lightning_logs", name="Rote_X_NotFreeze")


In [33]:
# Trainer
trainer = Trainer(max_epochs=10, logger=logger)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [34]:
# Find learning rate
trainer.fit(model, train_dl, val_dl)


  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:   1%|▏         | 2/156 [00:04<05:24,  2.11s/it, loss=0.72, v_num=0, train_loss_step=0.741, train_accuracy_step=0.625]



Epoch 9: 100%|██████████| 156/156 [03:50<00:00,  1.48s/it, loss=0.563, v_num=0, train_loss_step=0.626, train_accuracy_step=0.750, val_loss_step=0.474, val_accuracy_step=0.875, val_loss_epoch=0.579, val_accuracy_epoch=0.734, val_auroc=0.535, train_loss_epoch=0.606, train_accuracy_epoch=0.710, train_auroc=0.504]


In [18]:
label_cols = ["Rote_X","Inference_X", "Deep_X", "Rote_Y", "Inference_Y", "Rote_Z", "Inference_Z", "Deep_Z", "Rote_D", "Inference_D"]

In [19]:
l_ds = get_datasets(label_cols, label_df, data_folder, x_transforms=[limit_sequence_len,lambda data: torch.tensor(data).float()], y_transforms=[lambda data: torch.tensor(data).float()])

In [38]:
dm_inf_y = l_ds[4][1]

In [39]:
dm_inf_y.setup("fit")

1235 1235


In [41]:
len(dm_inf_y.dataset_train.files)

1235

In [20]:
all_splits = get_all_splits(l_ds, label_df)

In [42]:
train_s, val_s = next(all_splits[4])

In [43]:
train_one_split(l_ds[4][1], train_s, val_s, 5, logger)

  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:   6%|▌         | 9/154 [00:20<05:33,  2.30s/it, loss=0.737, v_num=0, train_loss_step=0.733, train_accuracy_step=0.625]



Epoch 0:  18%|█▊        | 27/154 [01:03<04:59,  2.36s/it, loss=0.682, v_num=0, train_loss_step=0.660, train_accuracy_step=0.625]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [22]:
def train_one_split(dm, train_split, val_split, max_epochs, logger):
    dm.setup(stage="fit")
    train_dl, val_dl = get_dataloaders_from_split(dm, train_split, val_split)
    encoder = creator.load_encoder(str(pre_trained_weights_dir.resolve()))
    model = EncoderClassifierModel(encoder, cuda=False, freeze_encoder=True)
    trainer=Trainer(max_epochs=max_epochs, logger=logger)
    trainer.fit(model, train_dl, val_dl)
    

In [1]:
from pytorch_lightning.loggers import TensorBoardLogger

def train_multiple_dms(dms, splits, label_cols, epochs=5):
    for split, dm, label_col in zip(splits, dms, label_cols):
        logger = TensorBoardLogger("lightning_logs", name=label_col)
        train_split, val_split = next(split)
        train_one_split(dm, train_split, val_split, epochs, logger)




In [44]:
dms = [t[1] for t in l_ds]
train_multiple_dms(dms[4:], all_splits[4:], label_cols[4:])

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Epoch 0:  18%|█▊        | 27/154 [05:32<26:03, 12.31s/it, loss=0.682, v_num=0, train_loss_step=0.660, train_accuracy_step=0.625] train_loss_epoch=0.725, train_accuracy_epoch=0.542, train_auroc=0.461]
Epoch 4:  33%|███▎      | 51/154 [01:45<03:33,  2.07s/it, loss=0.585, v_num=0, train_loss_step=0.893, train_accuracy_step=0.375, train_loss_epoch=0.593, train_accuracy_epoch=0.717, train_auroc=0.466]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


1268 1268
Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:   4%|▍         | 6/157 [00:11<04:47,  1.90s/it, loss=0.756, v_num=0, train_loss_step=0.772, train_accuracy_step=0.625]



Epoch 0:  15%|█▌        | 24/157 [00:46<04:18,  1.94s/it, loss=0.713, v_num=0, train_loss_step=0.742, train_accuracy_step=0.375]



Epoch 4: 100%|██████████| 157/157 [04:27<00:00,  1.71s/it, loss=0.67, v_num=0, train_loss_step=0.756, train_accuracy_step=0.375, val_loss_step=0.696, val_accuracy_step=0.625, val_loss_epoch=0.634, val_accuracy_epoch=0.667, val_auroc=0.539, train_loss_epoch=0.666, train_accuracy_epoch=0.606, train_auroc=0.517] 


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


1268 1268
Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:   3%|▎         | 4/157 [00:09<06:20,  2.49s/it, loss=0.659, v_num=0, train_loss_step=0.721, train_accuracy_step=0.500]



Epoch 4: 100%|██████████| 157/157 [04:21<00:00,  1.67s/it, loss=0.667, v_num=0, train_loss_step=0.903, train_accuracy_step=0.375, val_loss_step=0.604, val_accuracy_step=0.750, val_loss_epoch=0.577, val_accuracy_epoch=0.728, val_auroc=0.551, train_loss_epoch=0.624, train_accuracy_epoch=0.700, train_auroc=0.429]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


1268 1268
Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:  65%|██████▍   | 102/157 [03:34<01:55,  2.10s/it, loss=0.714, v_num=0, train_loss_step=0.843, train_accuracy_step=0.375]



Epoch 4: 100%|██████████| 157/157 [03:52<00:00,  1.48s/it, loss=0.671, v_num=0, train_loss_step=0.725, train_accuracy_step=0.750, val_loss_step=0.674, val_accuracy_step=0.625, val_loss_epoch=0.673, val_accuracy_epoch=0.590, val_auroc=0.586, train_loss_epoch=0.705, train_accuracy_epoch=0.549, train_auroc=0.530]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


1041 1041
Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:  13%|█▎        | 17/129 [00:49<05:24,  2.90s/it, loss=0.69, v_num=0, train_loss_step=0.798, train_accuracy_step=0.250] 



Epoch 4:  57%|█████▋    | 74/129 [03:12<02:23,  2.61s/it, loss=0.686, v_num=0, train_loss_step=0.561, train_accuracy_step=0.625, train_loss_epoch=0.689, train_accuracy_epoch=0.635, train_auroc=0.485]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name            | Type              | Params
------------------------------------------------------
0 | model           | Sequential        | 362 K 
1 | criterion       | BCEWithLogitsLoss | 0     
2 | auroc_metric    | AUROC             | 0     
3 | accuracy_metric | Accuracy          | 0     
------------------------------------------------------
362 K     Trainable params
0         Non-trainable params
362 K     Total params
1.451     Total estimated model params size (MB)


1045 1045
Loading:  /Users/rickgentry/emotive_lab/eyemind/OBF/pre_weights/sample_weights/encoder_1633040995_gru.pt
Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0:  30%|███       | 39/130 [01:39<03:51,  2.54s/it, loss=0.691, v_num=0, train_loss_step=0.837, train_accuracy_step=0.250]



Epoch 4: 100%|██████████| 130/130 [04:27<00:00,  2.06s/it, loss=0.727, v_num=0, train_loss_step=0.709, train_accuracy_step=0.625, val_loss_step=0.456, val_accuracy_step=1.000, val_loss_epoch=0.664, val_accuracy_epoch=0.629, val_auroc=0.396, train_loss_epoch=0.701, train_accuracy_epoch=0.570, train_auroc=0.445]
