In [5]:
import numpy as np
import argparse
import logging

from multiprocessing import cpu_count
from pathlib import Path
from typing import Dict, List, Any

import torch
from torch.optim import SGD
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler, Subset

from systems import EpicActionRecogintionShapleyClassifier

from models.esvs import Net
from datasets.pickle_dataset import PickleDataset
from frame_sampling import RandomSampler

from ipdb import launch_ipdb_on_exception

import plotly.express as px
import plotly.graph_objects as go

from livelossplot import PlotLosses
from torch.utils.tensorboard import SummaryWriter

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dtype = torch.float

In [None]:
batch_size = 512
n_frames = 4
pickle_dir = '../datasets/epic/features/p01_features.pkl'
log_interval = 500

frame_sampler = RandomSampler(frame_count=n_frames, snippet_length=1, test=False)
dataset = PickleDataset(pickle_dir, frame_sampler)

writer = SummaryWriter('runs/epic_all_models_test')

In [None]:


model = Net(frame_count=n_frames).to(device)

lr = {
    1e-3: [],
    1e-4: [], 
    1e-5: []
}

for l in lr.keys():
    loss_b = []
    for b in batch_size:
        dataloader = DataLoader(dataset, batch_size=b, shuffle=True)
        data = iter(dataloader)
        inputs = data.next()

        optimiser = SGD(model.parameters(), lr=l, momentum=0.9)
        classifier = EpicActionRecogintionShapleyClassifier(model, dataloader, optimiser, device, log_interval=log_interval)
        running_loss = 0.0
        training_loss = []
        for i in range(5000):

            optimiser.zero_grad()

            step = classifier._step(inputs)

            loss = step['loss']
            loss.backward()
            optimiser.step()

    #         print(f'iter: {i}, 'f'loss: {loss.item()}')
            running_loss += loss.item()
            training_loss.append([i, loss.item()])

            if i % log_interval == log_interval-1:
                print('%5d) loss: %.3f' % (i + 1, running_loss / log_interval))
                running_loss = 0.0

        loss_b.append(np.array(training_loss))
    lr[l] = loss_b

In [None]:

for l, lss in lr.items():
    
    fig = go.Figure()
    for i, ls in enumerate(lss):

        fig.add_trace(go.Scatter(
            x=ls[:,0],
            y=ls[:,1],
    #         line_shape='spline',
            name=f'batch_size: {batch_size[i]}'
        ))

    fig.update_layout(
        xaxis_title='batched steps',
        yaxis_title='loss',
        title=f'Training loss for lr: {l}',
        yaxis_range=[-4, 2]
    )
    fig.update_yaxes(type="log")
    fig.show()


## Running training loop over whole dataset with

`torch.optim.Adam(model.parameters(), lr = 1e-4`

In [None]:
batch_size = 512

############## split ##################
validation_split = 0.2
idxs = list(range(len(dataset)))
split = int(np.floor(validation_split * len(dataset)))
np.random.shuffle(idxs)

train_idx, test_idx = idxs[split:], idxs[:split]

train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
################################

liveloss = PlotLosses()

# log_interval = round(len(dataloader) / batch_size)
# log_interval = 10

model = Net(frame_count=n_frames).to(device)
optimiser = Adam(model.parameters(), lr=3e-4)
classifier = EpicActionRecogintionShapleyClassifier(model, device, optimiser, train_loader, test_loader, log_interval=log_interval)
training_loss = {
    'loss': [],
    'acc1': [],
    'acc5': [],
}
testing_loss = {
    'loss': [],
    'acc1': [],
    'acc5': [],
}
for epoch in range(2000):
    logs = {}
    running_loss = 0.0
    running_acc1 = 0.0
    running_acc5 = 0.0
    for batch_idx, data in enumerate(train_loader):
        optimiser.zero_grad()
        step = classifier._step(data)

        loss = step['loss']
        acc1 = step['verb_accuracy@1']
        acc5 = step['verb_accuracy@5']
        
        loss.backward()
        optimiser.step()

        running_loss += loss.item()
        running_acc1 += acc1.item()
        running_acc5 += acc5.item()
        training_loss['loss'].append(loss.item())
        training_loss['acc1'].append(acc1.item())
        training_loss['acc5'].append(acc5.item())
        
        
    epoch_loss = running_loss / len(train_loader)
    epoch_acc1 = running_acc1 / len(train_loader)
    epoch_acc5 = running_acc5 / len(train_loader)

    logs['loss'] = epoch_loss
    logs['accuracy'] = epoch_acc1
    logs['accuracy_5'] = epoch_acc5
    
    writer.add_scalar('loss', epoch_loss, epoch)
    writer.add_scalar('acc1', epoch_acc1, epoch)
    writer.add_scalar('acc5', epoch_acc5, epoch)
    
    t_running_loss = 0.0
    t_running_acc1 = 0.0
    t_running_acc5 = 0.0
    
    for batch_idx, test_data in enumerate(test_loader):
        
        step = classifier._step(data)
        
        loss = step['loss']
        acc1 = step['verb_accuracy@1']
        acc5 = step['verb_accuracy@5']
        
        t_running_loss += loss.item()
        t_running_acc1 += acc1.item()
        t_running_acc5 += acc5.item()
        testing_loss['loss'].append(loss.item())
        testing_loss['acc1'].append(acc1.item())
        testing_loss['acc5'].append(acc5.item())
        
    t_epoch_loss = t_running_loss / len(test_loader)
    t_epoch_acc1 = t_running_acc1 / len(test_loader)
    t_epoch_acc5 = t_running_acc5 / len(test_loader)
    
    logs['val_loss'] = t_epoch_loss
    logs['val_accuracy'] = t_epoch_acc1
    logs['val_accuracy_5'] = t_epoch_acc5
    
    writer.add_scalar('t_loss', t_epoch_loss, epoch)
    writer.add_scalar('t_acc1', t_epoch_acc1, epoch)
    writer.add_scalar('t_acc5', t_epoch_acc5, epoch)
        
    liveloss.update(logs)
    liveloss.send()

In [None]:
# fig = go.Figure()

# for i, cl in enumerate(computed_loss):
#     x = np.linspace(1, len(cl), len(cl), dtype=int)
    
#     fig.add_trace(go.Scatter(
#         x=x,
#         y=cl,
#         name=f'lr: {lr[i]}'
#     ))

# fig.update_layout(
#     xaxis_title='batched steps',
#     yaxis_title='loss',
#     title=f'Training loss for lr: {1e-4}'
# )
# fig.update_yaxes(type="log")
# fig.show()
import pickle
with open('../datasets/epic/models/4-frame_training_loss.pkl', 'wb') as f:
    pickle.dump(training_loss, f)
    
with open('../datasets/epic/models/4-frame_testing_loss.pkl', 'wb') as f:
    pickle.dump(testing_loss, f)

In [None]:
fig = go.Figure()

y = [cl[-1] for cl in computed_loss]
        
fig.add_trace(go.Scatter(
    x=lr,
    y=y
))

fig.update_layout(
    xaxis_title='learning rate',
    yaxis_title='min_loss',
    title='learning rates'
)
fig.update_xaxes(type="log")
fig.show()

In [None]:
model = Net(frame_count=n_frames).to(device)
dataloader = DataLoader(dataset, batch_size=512, shuffle=True)

optimiser = Adam(model.parameters(), lr=1e-4)

classifier = EpicActionRecogintionShapleyClassifier(model, dataloader, optimiser, device, log_interval=log_interval)
data = iter(dataloader)
inputs = data.next()


out = classifier._step(inputs)

out

In [None]:
frame_sampler = RandomSampler(frame_count=n_frames, snippet_length=1, test=False)
dataset = PickleDataset(pickle_dir, frame_sampler)

validation_split = 0.2

idxs = list(range(len(dataset)))
split = int(np.floor(validation_split * len(dataset)))
np.random.shuffle(idxs)


train_idx, test_idx = idxs[split:], idxs[:split]

train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

In [None]:
len(train_loader), len(test_loader)

In [None]:
data = iter(train_loader)

inputs = data.next()

inputs
# classifier.save_parameters('../datasets/epic/models/4-frame-trn-0_695.pt')
# out = classifier._step(inputs)
    


In [None]:
from gulpio2 import GulpDirectory

class GulpDataset(Dataset):

    def __init__(self, gulp_dir: Path):
        self.narration_ids = [],
        self.gulp_data: GulpDirectory
        self._load(gulp_dir)

    def _load(self, gulp_dir: Path):
        self.gulp_data = GulpDirectory(gulp_dir)
        self.narration_ids = list(self.gulp_data.merged_meta_dict.keys())
        
    def __len__(self):
        return len(self.narration_ids)

    def __getitem__(self, key: int):
        return self.gulp_data[self.narration_ids[key]]

In [None]:
dataset = GulpDataset('../datasets/epic/gulp/rgb/rgb_test/')

dataloader = DataLoader(dataset, batch_size=1, num_workers=4)

In [None]:
# inputs, labels = dataset.gulp_data['P01_01_96']

loader = iter(dataloader)
inputs, labels = loader.next()

In [None]:
labels
# inputs = torch.cat(inputs).to(dtype=torch.float)

# inputs = inputs.index_select(0, torch.tensor([0,3,1,2]))
# inputs.shape

In [None]:
def no_collate(args):
    return args

In [None]:
import pickle
frame_cumsum = np.array([0.])
with open('../datasets/epic/features/tem_pickle.pkl', 'rb') as f:
    pkl = pickle.load(f)
    frame_counts = [label['num_frames'] for label in pkl['labels']]
    frame_cumsum = np.cumsum(np.concatenate([frame_cumsum, frame_counts]), dtype=int)
    
key = 10

def _video_from_narration_id(key: int):
    l = frame_cumsum[key]
    r = frame_cumsum[key+1]
    return pkl['features'][l:r]

features = _video_from_narration_id(10)

features.shape, { k: pkl['labels'][key][k] for k in ['narration_id','verb_class','noun_class', 'num_frames'] }

In [None]:
import pickle
class MultiPickleDataset(Dataset):
    
    def __init__(self, pkl_path: Path, features_dim: int = 256):
        self.pkl_path = pkl_path
        self.features_dim = features_dim
        self.pkl_dict = Dict[str, Any]
        self.frame_cumsum = np.array([0.])
        self._load()
        
    def _load(self):
        with open(self.pkl_path, 'rb') as f:
            self.pkl_dict = pickle.load(f)
            frame_counts = [label['num_frames'] for label in self.pkl_dict['labels']]
            self.frame_cumsum = np.cumsum(np.concatenate([self.frame_cumsum, frame_counts]), dtype=int)
    
    def _video_from_narration_id(self, key: int):
        l = self.frame_cumsum[key]
        r = self.frame_cumsum[key+1]
        return self.pkl_dict['features'][l:r]
    
    def __len__(self):
        return len(self.pkl_dict['narration_id'])
    
    def __getitem__(self, key: int):
        features = self._video_from_narration_id(key)
        video_length = features.shape[0]
        assert video_length == self.pkl_dict['labels'][key]['num_frames']
        
        return (features, { k: self.pkl_dict['labels'][key][k] for k in ['narration_id','verb_class','noun_class'] })

In [None]:
mdataset = MultiPickleDataset('../datasets/epic/features/tem_pickle.pkl')

In [None]:
dl = DataLoader(mdataset, batch_size=64, collate_fn=no_collate, shuffle=True)
frame_sampler = RandomSampler(frame_count=5, snippet_length=1, test=False)

models = [Net(frame_count=i) for i in range(1,9)]
optims = [Adam(m.parameters(), lr=1e-4) for m in models]
f_samp = [RandomSampler(frame_count=m.frame_count, snippet_length=1, test=False) for m in models]

from torchvideo.samplers import frame_idx_to_list

liveloss = PlotLosses()
assert len(models) == len(optims)
training_loss = {}
testing_loss = {}
for m, o, f in zip(models, optims, f_samp):
    classifier = EpicActionRecogintionShapleyClassifier(m, device, o, None, None, log_interval=10)
    
    for epoch in range(100):
        logs = {}
        running_loss = 0.0
        running_acc1 = 0.0
        running_acc5 = 0.0
        for batch_idx, data in enumerate(dl):
            features = []
            labels = {}
            for feature, label in data:
                length = feature.shape[0]
                if length < 5:
                    raise ValueError('video too short')
                idxs = np.array(frame_idx_to_list(f.sample(length)))
                features.append(feature[idxs])
                for k in label.keys():
                    if k in labels:
                        labels[k].append(label[k])
                    else:
                        labels[k] = [label[k]]

            for k in labels.keys():
                try:
                    labels[k] = torch.tensor(labels[k])
                except ValueError:
                    pass

            inputs = torch.tensor(features, dtype=dtype)

            o.zero_grad()
            step = classifier._step((inputs, labels))

            loss = step['loss']
            acc1 = step['verb_accuracy@1']
            acc5 = step['verb_accuracy@5']

            loss.backward()
            o.step()

            running_loss += loss.item()
            running_acc1 += acc1.item()
            running_acc5 += acc5.item()
            if f'{m.frame_count}_loss' not in training_loss:
                training_loss[f'{m.frame_count}_loss'] = [loss.item()]
            else:
                training_loss[f'{m.frame_count}_loss'].append(loss.item())
            if f'{m.frame_count}_acc1' not in training_loss:
                training_loss[f'{m.frame_count}_acc1'] = [acc1.item()]
            else:
                training_loss[f'{m.frame_count}_acc1'].append(acc1.item())
            if f'{m.frame_count}_acc5' not in training_loss:
                training_loss[f'{m.frame_count}_acc5'] = [acc5.item()]
            else:
                training_loss[f'{m.frame_count}_acc5'].append(acc5.item())


        epoch_loss = running_loss / len(dl)
        epoch_acc1 = running_acc1 / len(dl)
        epoch_acc5 = running_acc5 / len(dl)
        
        logs[f'{m.frame_count}_loss'] = epoch_loss
        logs[f'{m.frame_count}_accuracy'] = epoch_acc1
        logs[f'{m.frame_count}_accuracy_5'] = epoch_acc5
        
        writer.add_scalar('loss', logs[f'{m.frame_count}_loss'], epoch)
        
        liveloss.update(logs)
        liveloss.send()

In [None]:
for d in dl:
    features = []
    labels = {}
#     labels = {k: [d[k] for d in labels] for k in d.keys()}
    for feature, label in d:
        length = feature.shape[0]
        if length < 5:
            raise ValueError('video too short')
        idxs = np.array(frame_idx_to_list(frame_sampler.sample(length)))
        features.append(feature[idxs])
        for k in label.keys():
            if k in labels:
                labels[k].append(label[k])
            else:
                labels[k] = [label[k]]
                
    for k in labels.keys():
        try:
            labels[k] = torch.tensor(labels[k], device=device)
        except ValueError:
            pass
    
    inputs = torch.tensor(features, device=device)
    
    print(inputs, labels)

In [None]:
fig = go.Figure()
for i in range(1,9):
    y = training_loss[f'{i}_loss']
    x = np.linspace(1, len(y), len(y))
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        name=f'{i} frames'
    ))

fig.update_layout(
    xaxis_title='batched steps',
    yaxis_title='loss',
    title='training performance for mtrn'
)
fig.update_yaxes(type="log")
fig.show()

In [None]:
fig = go.Figure()
for i in range(1,9):
    y = training_loss[f'{i}_acc1']
    x = np.linspace(1, len(y), len(y))
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        name=f'{i} frames'
    ))

fig.update_layout(
    xaxis_title='batched steps',
    yaxis_title='accuracy',
    title='accuracy performance for mtrn'
)
# fig.update_yaxes(type="log")
fig.show()

In [None]:
fig = go.Figure()
for i in range(1,9):
    y = training_loss[f'{i}_acc5']
    x = np.linspace(1, len(y), len(y))
    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        name=f'{i} frames'
    ))

fig.update_layout(
    xaxis_title='batched steps',
    yaxis_title='accuracy',
    title='accuracy k=5 performance for mtrn'
)
# fig.update_yaxes(type="log")
fig.show()

In [None]:
import itertools
for i, d in enumerate(itertools.islice(dl, 1, len(dl))):
    print(i, len(d))

In [10]:
from features.pkl import PickleFeatureWriter
from datasets.gulp_dataset import GulpDataset

wr = PickleFeatureWriter('../datasets/epic/features/temp.pkl', features_dim=256)
ds = GulpDataset('../datasets/epic/gulp/rgb/rgb_test/')
idxs = torch.arange(20, len(ds))

In [11]:
n_ds = Subset(ds, idxs)

In [14]:
ds.narration_ids

['P01_01_0',
 'P01_01_1',
 'P01_01_10',
 'P01_01_100',
 'P01_01_101',
 'P01_01_102',
 'P01_01_103',
 'P01_01_104',
 'P01_01_105',
 'P01_01_106',
 'P01_01_107',
 'P01_01_108',
 'P01_01_109',
 'P01_01_11',
 'P01_01_110',
 'P01_01_111',
 'P01_01_112',
 'P01_01_113',
 'P01_01_114',
 'P01_01_115',
 'P01_01_116',
 'P01_01_117',
 'P01_01_118',
 'P01_01_12',
 'P01_01_120',
 'P01_01_121',
 'P01_01_122',
 'P01_01_123',
 'P01_01_124',
 'P01_01_125',
 'P01_01_126',
 'P01_01_127',
 'P01_01_128',
 'P01_01_129',
 'P01_01_13',
 'P01_01_130',
 'P01_01_131',
 'P01_01_132',
 'P01_01_133',
 'P01_01_134',
 'P01_01_135',
 'P01_01_136',
 'P01_01_137',
 'P01_01_138',
 'P01_01_139',
 'P01_01_14',
 'P01_01_140',
 'P01_01_141',
 'P01_01_142',
 'P01_01_143',
 'P01_01_144',
 'P01_01_145',
 'P01_01_146',
 'P01_01_147',
 'P01_01_148',
 'P01_01_149',
 'P01_01_15',
 'P01_01_150',
 'P01_01_151',
 'P01_01_152',
 'P01_01_153',
 'P01_01_154',
 'P01_01_155',
 'P01_01_156',
 'P01_01_157',
 'P01_01_158',
 'P01_01_159',
 'P01

In [27]:
import pickle
with open('../datasets/epic/features/tem_pickle.pkl', 'rb') as f:
    xallow = pickle.load(f)
    


{'length': 329,
 'narration_id': ['P01_01_0',
  'P01_01_1',
  'P01_01_10',
  'P01_01_100',
  'P01_01_101',
  'P01_01_102',
  'P01_01_103',
  'P01_01_104',
  'P01_01_105',
  'P01_01_106',
  'P01_01_107',
  'P01_01_108',
  'P01_01_109',
  'P01_01_11',
  'P01_01_110',
  'P01_01_111',
  'P01_01_112',
  'P01_01_113',
  'P01_01_114',
  'P01_01_115',
  'P01_01_116',
  'P01_01_117',
  'P01_01_118',
  'P01_01_12',
  'P01_01_120',
  'P01_01_121',
  'P01_01_122',
  'P01_01_123',
  'P01_01_124',
  'P01_01_125',
  'P01_01_126',
  'P01_01_127',
  'P01_01_128',
  'P01_01_129',
  'P01_01_13',
  'P01_01_130',
  'P01_01_131',
  'P01_01_132',
  'P01_01_133',
  'P01_01_134',
  'P01_01_135',
  'P01_01_136',
  'P01_01_137',
  'P01_01_138',
  'P01_01_139',
  'P01_01_14',
  'P01_01_140',
  'P01_01_141',
  'P01_01_142',
  'P01_01_143',
  'P01_01_144',
  'P01_01_145',
  'P01_01_146',
  'P01_01_147',
  'P01_01_148',
  'P01_01_149',
  'P01_01_15',
  'P01_01_150',
  'P01_01_151',
  'P01_01_152',
  'P01_01_153',
  