In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
sys.path.append('..')

In [3]:
import torch
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning
import pytorchvideo.data
import pytorchvideo.models.resnet
import pytorchvideo.models.slowfast
import torch.utils.data
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from sklearn.model_selection import KFold
from torch.utils.data import Subset
from sklearn.metrics import roc_auc_score
from fvcore.common.config import CfgNode
from IPython.display import clear_output

In [5]:
from lared_laughter.constants import dataset_path
import optimizer
from dataset import my_video_dataset_from_dataframe
from defaults import get_cfg
from transforms import get_kinetics_train_transform, get_kinetics_val_transform
from utils import get_metrics
cfg = get_cfg()

In [5]:
def make_kinetics_resnet():
#   return pytorchvideo.models.slowfast.create_slowfast(
#       input_channels=(3,3), # RGB input from Kinetics
#       model_depth=50, # For the tutorial let's just use a 50 layer network
#       model_num_class=2, # Kinetics has 400 classes so we need out final head to align
#   )
  return pytorchvideo.models.resnet.create_resnet(
      input_channel=3, # RGB input from Kinetics
      model_depth=50, # For the tutorial let's just use a 50 layer network
      model_num_class=2, # Kinetics has 400 classes so we need out final head to align
      norm=nn.BatchNorm3d,
      activation=nn.ReLU,
      
  )

In [8]:
class VideoClassificationLightningModule(pytorch_lightning.LightningModule):
  def __init__(self, optim_cfg={}):
      super().__init__()
      self.model = make_slowfast_feature_extractor()
      self.optim_cfg = optim_cfg

  def forward(self, x):
      return self.model(x)

  def training_step(self, batch, batch_idx):

      # learning rate scheduling
      epoch_exact = self.current_epoch + float(batch_idx) / self.trainer.num_training_batches
      self.log("epoch_exact", epoch_exact)
      lr = optimizer.get_epoch_lr(epoch_exact, self.optim_cfg)
      
      optimizer.set_lr(self.optimizers().optimizer, lr)
      self.log("learning_rate", lr)

      # The model expects a video tensor of shape (B, C, T, H, W), which is the
      # format provided by the dataset
      y_hat = self.model(batch["video"])

      # Compute cross entropy loss, loss.backwards will be called behind the scenes
      # by PyTorchLightning after being returned from this method.
      loss = F.cross_entropy(y_hat, batch["label"])

      # Log the train loss to Tensorboard
      self.log("train_loss", loss.item())

      return loss

  def validation_step(self, batch, batch_idx):
      y_hat = self.model(batch["video"])
      loss = F.cross_entropy(y_hat, batch["label"])
      self.log("val_loss", loss)
      return (y_hat[:,1], batch["label"])

  def validation_epoch_end(self, validation_step_outputs):
        all_outputs = torch.cat([o[0] for o in validation_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in validation_step_outputs]).cpu()

        try:
            val_auc = roc_auc_score(all_labels, all_outputs)
            self.log('val_auc', val_auc)
        except ValueError:
            pass

  def test_step(self, batch, batch_idx):
      y_hat = self.model(batch["video"])
      loss = F.cross_entropy(y_hat, batch["label"])
      return (y_hat[:,1], batch["label"])

  def test_epoch_end(self, validation_step_outputs):
        all_outputs = torch.cat([o[0] for o in validation_step_outputs]).cpu()
        all_labels = torch.cat([o[1] for o in validation_step_outputs]).cpu()

        self.test_results = {'proba': all_outputs, 'labels': all_labels}
        try:
            test_auc = roc_auc_score(all_labels, all_outputs)
            self.test_results['auc'] = test_auc
            self.log('test_auc', test_auc)
        except ValueError:
            pass

  def configure_optimizers(self):
      """
      Setup the Adam optimizer. Note, that this function also can return a lr scheduler, which is
      usually useful for training video models.
      """
      return optimizer.get_optimizer(self.model, self.optim_cfg)
      

In [9]:
def train_from_scratch():
    cfg = get_cfg()
    cfg.merge_from_other_cfg(CfgNode({
        'SOLVER': {
            'OPTIMIZING_METHOD': 'sgd',
            'BASE_LR': 0.1,
            'LR_POLICY': 'cosine',
            'MOMENTUM': 0.9,
            'WEIGHT_DECAY': 1e-4,
            'WARMUP_EPOCHS': 0.0,
            'WARMUP_START_LR': 0.01
        }
    }))
    classification_module = VideoClassificationLightningModule(optim_cfg=cfg)
    data_module = KineticsDataModule()
    trainer = pytorch_lightning.Trainer(
        callbacks=[EarlyStopping(monitor="val_loss", mode="min")],
        accelerator='gpu',
        log_every_n_steps=1,
        max_epochs=50
    )
    trainer.fit(classification_module, data_module)

In [10]:
def do_fold(train_ds, test_ds):
    # data loaders
    data_loader_train = torch.utils.data.DataLoader(
        train_ds, batch_size=8, shuffle=True, num_workers=10,
        collate_fn=None)
    data_loader_val = torch.utils.data.DataLoader(
        test_ds, batch_size=8, shuffle=False, num_workers=10,
        collate_fn=None)

    cfg = get_cfg()
    cfg.merge_from_other_cfg(CfgNode({
        'SOLVER': {
            'OPTIMIZING_METHOD': 'sgd',
            'BASE_LR': 0.1,
            'LR_POLICY': 'none',
            'MOMENTUM': 0.9,
            'WEIGHT_DECAY': 1e-4,
        }
    }))
    
    system = VideoClassificationLightningModule(optim_cfg=cfg)
    trainer = pytorch_lightning.Trainer(
        # callbacks=[EarlyStopping(monitor="val_loss", mode="min")],
        accelerator='gpu',
        log_every_n_steps=1,
        max_epochs=5)
    trainer.fit(system, data_loader_train, data_loader_val)

    trainer.test(system, data_loader_val)
    return system.test_results

In [11]:
def do_cross_validation(dataset, metrics_name='binary'):
    seed = 22
    cv_splits = KFold(n_splits=10, random_state=seed, shuffle=True).split(range(len(dataset)))

    outputs = torch.empty((len(dataset),))
    labels = torch.empty((len(dataset),), dtype=torch.int)
    for f, (train_idx, test_idx) in enumerate(cv_splits):
        # create datasets
        train_ds = Subset(dataset, train_idx)
        test_ds = Subset(dataset, test_idx)

        fold_outputs = do_fold(train_ds, test_ds)
        outputs[test_idx] = fold_outputs['proba'].float()
        labels[test_idx] = fold_outputs['labels'].int()
        clear_output(wait=True)

    run_metrics = get_metrics(outputs, labels, metrics_name)

    return outputs, run_metrics

In [8]:
examples = pd.read_csv('../dataset/computational_examples.csv')


In [9]:
examples.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,person,cam,hit_id,condition,calibration,hash,ini_time,end_time,...,gt_offset,gt_laughter,is_laughter,confidence,intensity,attempt,pressed_key,onset,offset,rating_hash
0,0,0,25,1,9c45e4f0c5442e796eb93e73e94dc6c2dfca7b9c4c54ff...,video,False,1170917790b51bc5a8dacacc4d8ed8c410b7ea6bb7ea4b...,7360.29,7361.54,...,4.420238,True,True,7,4,0,True,3.33667,6.639973,7af591213b827db95c12c56e76e0b1fe518f2088d11aad...
1,1947,1947,25,1,bff9b86d833a595e6fe5a54f45093fa168cda45db1143e...,video,False,1170917790b51bc5a8dacacc4d8ed8c410b7ea6bb7ea4b...,7360.29,7361.54,...,4.420238,True,True,1,6,0,True,2.569236,6.639973,25df21dc0f25e11a7c4aba77e502269d42a7bb548044f2...
2,546,546,25,1,4198c11729cea33268040a725998f16478a6564d4af091...,audio,False,1170917790b51bc5a8dacacc4d8ed8c410b7ea6bb7ea4b...,7360.29,7361.54,...,4.420238,True,True,7,5,0,True,2.10322,4.26322,2cb0148d83e939600a9e1d71872ba748334e4d8d0cafa0...
3,2440,2440,25,1,a9760ede24043c59a0151b09a46e866fa43f74bd60b682...,audio,False,1170917790b51bc5a8dacacc4d8ed8c410b7ea6bb7ea4b...,7360.29,7361.54,...,4.420238,True,True,7,6,0,True,2.78322,4.14322,b3cc8b0750211f5b5100f20641793ad043dc7cc823ad9a...
4,1058,1058,25,1,f4c9842cec7be99eeaaea36d0c7d077c4d5d94596dc731...,av,False,1170917790b51bc5a8dacacc4d8ed8c410b7ea6bb7ea4b...,7360.29,7361.54,...,4.420238,True,True,7,7,0,True,2.792656,3.893757,bf6cd2aeaf7c77c2c2ff873e6f603b7d46cd64c74e9ebd...


In [21]:
res = {}
for label_modality in ['video']:

    filtered_examples = examples[examples['condition'] == label_modality]
    filtered_examples['filename'] = filtered_examples['hash']+'.mp4'
    video_path = os.path.join(cloud_data_path, 'laughter_data', 'ml_datasets', 'tight', 'video')

    dataset = my_video_dataset_from_dataframe(
        examples_df=filtered_examples,
        video_path_prefix=video_path,
        clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
        transform=get_kinetics_train_transform(32, 256, True),
        decode_audio=False,
        file_path_key='filename',
        label_key='pressed_key'
    )

    res[label_modality] = []
    for i in range(5):
        _, metrics = do_cross_validation(dataset, metrics_name='binary')
        res[label_modality].append(metrics)
        torch.cuda.empty_cache()

Using cache found in /home/jose/.cache/torch/hub/facebookresearch_pytorchvideo_main
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | Net  | 33.6 M
-------------------------------
4.6 K     Trainable params
33.6 M    Non-trainable params
33.6 M    Total params
134.596   Total estimated model params size (MB)


bn 0, non bn 2, zero 0 no grad 330


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_auc            0.5378378378378379
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Video feature extraction

In [6]:
from jose.torch.hooks import FeatureRecorder, Signal
from tqdm.notebook import tqdm

In [7]:
recorder = FeatureRecorder()
model = make_slowfast_feature_extractor()
model.blocks[-1].proj.register_forward_hook(recorder.get_hook('proj_input', Signal.INPUT))

NameError: name 'make_slowfast_feature_extractor' is not defined

In [8]:
examples = pd.read_csv('../dataset/computational_examples.csv')
examples = examples[examples['condition'] == 'av']
examples['filename'] = examples['hash']+'.mp4'
video_path = os.path.join(dataset_path, 'video')

In [9]:
ds = my_video_dataset_from_dataframe(
    examples_df=examples,
    video_path_prefix=video_path,
    clip_sampler=pytorchvideo.data.make_clip_sampler("random", 2),
    transform=get_kinetics_val_transform(32, 256, True),
    decode_audio=False,
    file_path_key='filename',
    label_key='pressed_key'
)
dl = torch.utils.data.DataLoader(
        ds, batch_size=8, shuffle=False, num_workers=10,
        collate_fn=None)

In [15]:
b = next(iter(dl))['video']
print(b[0].shape, b[1].shape)

torch.Size([8, 3, 8, 256, 256]) torch.Size([8, 3, 32, 256, 256])


2

In [12]:
model.eval()
recorder.clear()
for batch in tqdm(dl):
    y_hat = model(batch['video'])
recorder.store_as_dict('./features/slowfast_test.pkl', dict_keys=examples.hash.to_list())

  0%|          | 0/84 [00:00<?, ?it/s]

torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Size([8, 1, 2, 2, 2304])
torch.Si