In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import logging
import pickle
import traceback

In [3]:
import torch

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, GroupKFold
from IPython.display import clear_output
import pytorch_lightning as pl
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
# set the cudnn
torch.backends.cudnn.benchmark=False
torch.backends.cudnn.deterministic=True

In [4]:
from lared_dataset.data_loading.dataset import FatherDataset, FatherDatasetSubset
from lared_dataset.data_loading.extractors import VideoExtractor
from lared_dataset.baselines.video.models import make_slow_pretrained_body as get_video_feature_extractor
from lared_dataset.baselines.video.transforms import get_kinetics_val_transform
from lared_dataset.constants import annot_exp_path, datasets_path, processed_data_path
from lared_dataset.baselines.train import System, train, test

In [5]:
def do_cross_validation(do_train, ds, input_modalities, seed, task='classification',
    prefix=None, first_fold_only=False, deterministic=False):
    
    cv_splits = list(GroupKFold(n_splits=10).split(range(len(ds)), groups=ds.get_groups()))
    
    if first_fold_only:
        # only do first fold
        cv_splits = [cv_splits[0]]
    else:
        # skip the first fold
        cv_splits = cv_splits[1:]

    all_results = []
    for f, (train_idx, test_idx) in enumerate(cv_splits):
        # load feature caches for fold f
        train_ds = FatherDatasetSubset(ds, train_idx, eval=False)
        test_ds = FatherDatasetSubset(ds, test_idx, eval=True)

        weights_path = os.path.join(
            'weights',
            task,
            f'I{"-".join(input_modalities)}_fold{f}.ckpt'
        )

        pl.utilities.seed.seed_everything(seed+f+734890573)
        if do_train:
            trainer = train(f, train_ds, test_ds, input_modalities,
                prefix = prefix + f'_fold{f}' if prefix else None,
                task=task,
                eval_every_epoch=first_fold_only,
                deterministic=deterministic,
                weights_path=weights_path)
            model = trainer.model
        else:
            model = System.load_from_checkpoint(checkpoint_path=weights_path)

        # ensures that the testing is reproducible regardless of training
        pl.utilities.seed.seed_everything(seed+f+2980374334)
        fold_outputs = test(f, model, test_ds, prefix = prefix + f'_fold{f}' if prefix else None,)
        all_results.append(fold_outputs)
        clear_output(wait=False)

    outputs = [r['proba'].numpy() for r in all_results]
    indices = [r['index'].numpy() for r in all_results]
    metrics = [r['metric'] for r in all_results]
    return metrics, outputs, indices

In [12]:
def do_run(task, examples, input_modalities, model_label_modality, test_label_modality,
    do_train=True, first_fold_only=False, deterministic=True, prefix=''):
    ''' train = True will train the models, and requires 
            model_label_modality = test_label_modality
        train = False will load weights to test the models and does not require
            model_label_modality = test_label_modality
    '''
    if do_train:
        assert model_label_modality == test_label_modality

    if task == 'classification':
        label_column = 'pressed_key'
    elif task == 'regression':
        label_column = 'intensity'
    elif task == 'segmentation':
        label_column = None
    else:
        raise ValueError()

    print(f'Using {len(examples)} examples')

    # create the feature datasets
    extractors = {}
    if 'video' in input_modalities:
        videos_path = '/home/jose/data/lared_video/video'
        pool = False if task == 'segmentation' else True
        extractors['video'] = VideoExtractor(videos_path, transform=get_kinetics_val_transform(8, 256, False), n_jobs=1)

    ds = FatherDataset(examples, extractors, 
        label_column=label_column,
        id_column='hash', )

    seed = 22    
    metrics, probas, indices = do_cross_validation(
        do_train,
        ds,
        first_fold_only=first_fold_only,
        input_modalities=input_modalities,
        task=task,
        deterministic=deterministic,
        seed=seed,
        prefix=f'{prefix}I{"-".join(input_modalities)}_L{model_label_modality}_T{test_label_modality}')

    torch.cuda.empty_cache()

    return {
        'metrics': metrics,
        'probas': probas,
        'indices': indices,
        'seed': seed
    }

In [13]:
def get_table(task, first_fold_only=False, do_train=True, deterministic=True):
    examples = pd.read_csv(os.path.join(processed_data_path, 'examples.csv'))

    all_input_modalities = [
        ('video',),
        ('pose',),
        ('accel',),
    ]    

    res = {}
    for input_modalities in all_input_modalities:

        run_results = do_run(
            task,
            examples, 
            input_modalities, 
            do_train=do_train,
            first_fold_only=first_fold_only,
            prefix=task[:3]+'_',
            deterministic=deterministic)

        res['-'.join(input_modalities)] = run_results
    return res

In [14]:
try:
    res = get_table(task='segmentation', first_fold_only=False, do_train=False, deterministic=False)
except Exception:
    print(traceback.format_exc())

In [21]:
pickle.dump(res2, open('results/segmentation.pkl', 'wb'))