In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import sys
sys.path.append('..')

In [3]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Subset
from tqdm import tqdm, trange

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegressionCV

In [5]:
from models.cnn import MyAlexNet
from modules.my_torch.helpers import train_one_epoch, eval, test
from modules.eval.eval import dataset_train_test
from dataset import AccelLaughterDataset

In [23]:
examples = pd.read_csv('../dataset/computational_examples.csv')
examples = examples[examples['condition'] == 'video']
ds = AccelLaughterDataset(examples, '../dataset/accel_ds.pkl')

loaded 672 examples
442 have accel


In [24]:
len(ds)

442

In [26]:
seed = 22
cv_splits = KFold(n_splits=4,
                         random_state=seed,
                         shuffle=True).split(range(len(ds)))

In [None]:
def do_fold(i):
    train_idx, test_idx = cv_splits[i]
    # leave only examples with accel
    train_idx = [idx for idx in train_idx if idx in ds.get_ids_with_data()]
    test_idx = [idx for idx in test_idx if idx in ds.get_ids_with_data()]
    
    # create datasets    
    train_ds = Subset(ds, train_idx)
    test_ds = Subset(ds, test_idx)
    
    # data loaders
    data_loader = torch.utils.data.DataLoader(
        train_ds, batch_size=100, shuffle=True, num_workers=0,
        collate_fn=None)
    data_loader_val = torch.utils.data.DataLoader(
        test_ds, batch_size=100, shuffle=False, num_workers=0,
        collate_fn=None)
    
    model = MyAlexNet()
    loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')
    optimizer = torch.optim.Adam(model.parameters(), lr=.001)
    
    device = torch.device('cpu')
    model = model.to(device)
    
    for epoch in range(10):
        try:
            train_one_epoch(model, loss_fn, device, data_loader, optimizer, epoch)
#             eval(model, loss_fn, device, data_loader_val)
        except KeyboardInterrupt:
            pass
        
    # testing
    val_y_true, val_proba, test_stats = test(model, loss_fn, device, data_loader_val)
    test_auc = roc_auc_score(val_y_true, val_proba)
    print(test_auc)
    return val_proba

In [None]:
for f in range(0,4):
    fold_proba = do_fold(f)
    train_idx, test_idx = cv_splits[f]
    test_idx = [idx for idx in test_idx if idx in ds.get_ids_with_data()]
    proba[test_idx] = fold_proba

In [None]:
np.savetxt('../analysis/cnn_proba.csv', proba)