In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
import logging

In [3]:
import torch

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, GroupKFold
from IPython.display import clear_output
import pytorch_lightning as pl
logging.getLogger("pytorch_lightning").setLevel(logging.WARNING)
# set the cudnn
torch.backends.cudnn.benchmark=False
torch.backends.cudnn.deterministic=True

In [4]:
from lared_laughter.fusion.dataset import FatherDataset, FatherDatasetSubset, CacheExtractor
from lared_laughter.accel.dataset import AccelExtractor
from lared_laughter.audio.dataset import AudioLaughterExtractor
from lared_laughter.video.dataset import VideoExtractor
from lared_laughter.audio.models.resnet import get_pretrained_body as get_audio_feature_extractor
from lared_laughter.video.models.models import make_slow_pretrained_body as get_video_feature_extractor
from lared_laughter.video.dataset.transforms import get_kinetics_val_transform
from lared_laughter.constants import annot_exp_path, datasets_path
from lared_laughter.utils import load_examples
from lared_laughter.fusion.train import System, train, test
from lared_laughter.fusion.utils import make_table

In [5]:
def do_cross_validation(do_train, ds, input_modalities, model_label_modality, test_label_modality,
    random_state, task='classification', prefix=None, first_fold_only=False, deterministic=False):
    
    cv_splits = list(GroupKFold(n_splits=10).split(range(len(ds)), groups=ds.get_groups()))
    
    if first_fold_only:
        # only do first fold
        cv_splits = [cv_splits[0]]
    else:
        # skip the first fold
        cv_splits = cv_splits[1:]

    all_results = []
    for f, (train_idx, test_idx) in enumerate(cv_splits):
        # load feature caches for fold f
        ds.load_caches(f)
        train_ds = FatherDatasetSubset(ds, train_idx, eval=False)
        test_ds = FatherDatasetSubset(ds, test_idx, eval=True)

        weights_path = os.path.join(
            'weights',
            f'I{"-".join(input_modalities)}_L{model_label_modality}_fold{f}.ckpt'
        )

        if do_train:
            trainer = train(f, train_ds, test_ds, input_modalities,
                prefix = prefix + f'_fold{f}' if prefix else None,
                task=task,
                eval_every_epoch=first_fold_only,
                deterministic=deterministic,
                weights_path=weights_path)
            model = trainer.model
        else:
            model = System.load_from_checkpoint(checkpoint_path=weights_path)

        fold_outputs = test(f, model, test_ds, prefix = prefix + f'_fold{f}' if prefix else None,)
        all_results.append(fold_outputs)
        
        # store the cache to disk
        ds.store_caches(clear=True)

        clear_output(wait=False)

    outputs = [r['proba'].numpy() for r in all_results]
    indices = [r['index'].numpy() for r in all_results]
    metrics = [r['metric'] for r in all_results]
    return metrics, outputs, indices

In [6]:
def do_run(examples, input_modalities, model_label_modality, test_label_modality,
    do_train=True, regression=False, first_fold_only=False, prefix=''):
    ''' train = True will train the models, and requires 
            model_label_modality = test_label_modality
        train = False will load weights to test the models and does not require
            model_label_modality = test_label_modality
    '''
    if do_train:
        assert model_label_modality == test_label_modality

    if regression:
        examples.loc[examples['intensity'].isna(), 'intensity'] = 0
        examples.loc[~examples['pressed_key'], 'intensity'] = 0
        label_column = 'intensity'
    else:
        label_column = 'pressed_key'

    print(f'Using {len(examples)} examples')

    # create the feature datasets
    extractors = {}
    if 'audio' in input_modalities:
        audios_path = os.path.join(datasets_path, "loose", "lared_audios.pkl")
        extractors['audio'] = CacheExtractor(
            enable=False,
            model = get_audio_feature_extractor().cuda(),
            extractor = AudioLaughterExtractor(audios_path, min_len=1.5, max_len=1.5, n_jobs=10),
            cache_path=f'./cache/Iaudio_L{test_label_modality}_cache{"_first" if first_fold_only else ""}'
        )
    if 'video' in input_modalities:
        videos_path = '/home/jose/data/lared_video/video'
        extractors['video'] = CacheExtractor(
            model = get_video_feature_extractor().cuda(),
            extractor = VideoExtractor(videos_path, transform=get_kinetics_val_transform(8, 256, False), n_jobs=1),
            cache_path=f'./cache/Ivideo_L{test_label_modality}_cache{"_first" if first_fold_only else ""}'
        )
    if 'accel' in input_modalities:
        accel_ds_path = os.path.join(datasets_path, 'loose', 'accel_long.pkl')
        extractors['accel'] = AccelExtractor(accel_ds_path, min_len=1.5, max_len=1.5)

    ds = FatherDataset(examples, extractors, label_column=label_column, id_column='hash', )
    assert len(ds) == 1318
        
    seed = 22
    pl.utilities.seed.seed_everything(seed, workers=True)

    metrics, probas, indices = do_cross_validation(
        do_train,
        ds,
        first_fold_only=first_fold_only,
        input_modalities=input_modalities,
        model_label_modality = model_label_modality, 
        test_label_modality = test_label_modality,
        task='regression' if regression else 'classification',
        deterministic=True,
        random_state=seed,
        prefix=f'{prefix}I{"-".join(input_modalities)}_L{model_label_modality}_T{test_label_modality}')

    torch.cuda.empty_cache()

    return {
        'metrics': metrics,
        'probas': probas,
        'indices': indices,
        'seed': seed
    }

In [12]:
def get_table(regression=False, first_fold_only=False, do_train=True):
    examples = load_examples(os.path.join(annot_exp_path, 'processed', 'examples_without_calibration.csv'))
    

    res = {}
    for input_modalities in [('video',)]:#[('accel',), ('audio',), ('video',)]:
        input_modality_res = {}

        for label_modality in ['audio', 'video', 'av']:

            filtered_examples = examples[examples['condition'] == label_modality].reset_index()
            run_results = do_run(
                filtered_examples, 
                input_modalities, 
                model_label_modality=label_modality,
                test_label_modality=label_modality,
                do_train=do_train,
                first_fold_only=first_fold_only,
                regression=regression,
                prefix='')
            input_modality_res[label_modality] = [run_results]

        res['-'.join(input_modalities)] = input_modality_res
    return res

In [13]:
res = get_table(do_train=True)

In [14]:
print(make_table(res).to_latex(
    index=False,
))

\begin{tabular}{llll}
\toprule
             & \multicolumn{3}{l}{Label Modality} \\
       Input &          Audio &         Video &   Audiovisual \\
\midrule
Acceleration &  0.704 (0.076) & 0.640 (0.036) & 0.667 (0.057) \\
\bottomrule
\end{tabular}



  row = pd.Series(index=index)
  print(make_table(res).to_latex(
