> This notebook can be used to load a trained model and produce predictions for each frame in the dataset which will be stored as `frame_predictions.csv` inside the model folder.

> You need to specify the experiment parameters including model id in this notebook

In [4]:
experiment = {
             'architecture': 'video_lrcnn_frozen',
             'dropout': 0.2,
             'layer_1_size': 256,
             'layer_2_size': 128,
             'layer_3_size': 0,
             'model_id': 5,
             'pooling': 'max',
             'pretrained_model_name': 'resnet50',
             'sequence_length': 5,
             'sequence_model': "LSTM",
             'sequence_model_layers': 2}

# Setup

In [5]:
# whether to log each feature and sequence status
verbose = True

In [6]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
from shutil import rmtree
sys.path.append('..')

In [7]:
from keras.models import load_model

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [8]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [9]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [10]:
from deepvideoclassification.architectures import Architecture

In [11]:
from deepvideoclassification.data import Data

# Load model weights from file 

> e.g. /models/123/model_best.h5

In [12]:
print(str(experiment["model_id"]) + "   " + "X"*60)
print(experiment)

architecture = Architecture(model_id = experiment['model_id'], 
                            architecture = experiment['architecture'], 
                            sequence_length = experiment['sequence_length'], 
                            pretrained_model_name = experiment['pretrained_model_name'],
                            pooling = experiment['pooling'],
                            sequence_model = experiment['sequence_model'],
                            sequence_model_layers = experiment['sequence_model_layers'],
                            layer_1_size = experiment['layer_1_size'],
                            layer_2_size = experiment['layer_2_size'],
                            layer_3_size = experiment['layer_3_size'],
                            dropout = experiment['dropout'],
                            verbose=True)

2020-05-14 18:42:57,731 [MainThread  ] [INFO ]  Loading data
2020-05-14 18:42:57,786 [MainThread  ] [INFO ]  resizing vid 1/3 to 224x224


5   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'architecture': 'video_lrcnn_frozen', 'dropout': 0.2, 'layer_1_size': 256, 'layer_2_size': 128, 'layer_3_size': 0, 'model_id': 5, 'pooling': 'max', 'pretrained_model_name': 'resnet50', 'sequence_length': 5, 'sequence_model': 'LSTM', 'sequence_model_layers': 2}


2020-05-14 18:43:00,662 [MainThread  ] [INFO ]  resizing vid 2/3 to 224x224
2020-05-14 18:43:02,348 [MainThread  ] [INFO ]  resizing vid 3/3 to 224x224
2020-05-14 18:43:12,148 [MainThread  ] [INFO ]  Computing pretrained model features for video 1/3 using pretrained model: resnet50, pooling: max
2020-05-14 18:44:45,179 [MainThread  ] [INFO ]  Computing pretrained model features for video 2/3 using pretrained model: resnet50, pooling: max
2020-05-14 18:45:41,070 [MainThread  ] [INFO ]  Computing pretrained model features for video 3/3 using pretrained model: resnet50, pooling: max
2020-05-14 18:47:06,040 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=262, valid=170, test=281


In [13]:
print(architecture.path_model + "model_best.h5")

/Users/alex/Documents/Work/thesis/_code/Deep-Neural-Networks-for-Video-Classification/models/5/model_best.h5


In [14]:
# load model weights
architecture.model = load_model(architecture.path_model + "model_best.h5")

# Load data and predict on test frames

In [15]:
sequence_length = experiment['sequence_length']
pretrained_model_name = experiment['pretrained_model_name']
pooling = experiment['pooling']

In [16]:
# build feature cache if it doesn't already exist
data = Data(sequence_length=sequence_length, 
            return_CNN_features=True,
            pretrained_model_name = pretrained_model_name,
            pooling=pooling)

2020-05-14 18:58:56,774 [MainThread  ] [INFO ]  Features already cached: /Users/alex/Documents/Work/thesis/_code/Deep-Neural-Networks-for-Video-Classification/cache/features/resnet50/max/
2020-05-14 18:58:56,777 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=262, valid=170, test=281


In [17]:
labels = data.labels

In [18]:
# get class names from data object
class_names = []
for k in sorted(data.label_map.keys()):
    class_names.append(data.label_map[k])
class_names = [c.replace("label_","") for c in class_names]
class_names

['noseal', 'seal']

In [19]:
# get list of videos
videos = list(labels['video'].unique())

In [21]:
def get_features_path(video):
    return pwd + 'cache/features/' + experiment['pretrained_model_name'] + '/' + experiment['pooling'] + '/' + video + '.npy'

## LRCN or video concat frame predictions

In [22]:

### LRCN and video concat
if experiment['architecture'] == 'video_lrcnn_frozen' or experiment['architecture'] == 'video_mlp_concat':
    # collect predictions for each video
    y_preds = []

    for c, video in enumerate(videos):
        print("Computing frame predictions for video {}/{}: {}".format(c+1,len(videos),video))

        # load features from disk
        features = np.load(get_features_path(video))

        dfs = []
        for i in range(sequence_length-1, len(features)):

            # get features for the clip
            features_frames = features[i-sequence_length+1:i+1,]
            features_frames = np.expand_dims(features_frames, axis=0)

            # run through model
            y_pred = architecture.model.predict(features_frames)

            # create pred dataframe
            df_pred = pd.DataFrame(y_pred[0]).T
            df_pred.columns = class_names
            df_pred.index = [i]
            dfs.append(df_pred)

        # join pred dataframe onto labels
        y_pred = pd.concat(dfs)
        y_pred['pred'] = y_pred.idxmax(axis=1)
        # align labels index
        y_labs = labels[labels['video']==video]
        y_labs.reset_index(inplace=True,drop=True)
        # join predictions on labels
        y_pred = pd.merge(y_pred, y_labs, left_index=True,right_index=True,how='left')
        y_pred['error'] = (y_pred['label'] != y_pred['pred']).astype(int)

        y_preds.append(y_pred)

    preds_all = pd.concat(y_preds)

Computing frame predictions for video 1/3: s26-8164
Computing frame predictions for video 2/3: s27-8212
Computing frame predictions for video 3/3: s28-20


## image_mlp_frozen frame predictions

In [23]:

### image mlp frozen
if experiment['architecture'] == 'image_mlp_frozen':
    # collect predictions for each video
    y_preds = []

    for c, video in enumerate(videos):
        print("Computing frame predictions for video {}/{}: {}".format(c+1,len(videos),video))

        # load features from disk
        features = np.load(get_features_path(video))

        dfs = []
        for i in range(0, len(features)):

            # get features for the clip
            features_frames = features[i,]
            features_frames = np.expand_dims(features_frames, axis=0)

            # run through model
            y_pred = architecture.model.predict(features_frames)

            # create pred dataframe
            df_pred = pd.DataFrame(y_pred[0]).T
            df_pred.columns = class_names
            df_pred.index = [i]
            dfs.append(df_pred)

        # join pred dataframe onto labels
        y_pred = pd.concat(dfs)
        y_pred['pred'] = y_pred.idxmax(axis=1)
        # align labels index
        y_labs = labels[labels['video']==video]
        y_labs.reset_index(inplace=True,drop=True)
        # join predictions on labels
        y_pred = pd.merge(y_pred, y_labs, left_index=True,right_index=True,how='left')
        y_pred['error'] = (y_pred['label'] != y_pred['pred']).astype(int)

        y_preds.append(y_pred)

    preds_all = pd.concat(y_preds)

# Print some frame predictions and write to file

In [24]:
preds_all.head()

Unnamed: 0,noseal,seal,pred,video,frame,label,split,error
4,0.976137,0.023863,noseal,s26-8164,s26-8164-00005.jpeg,noseal,train,0
5,0.979143,0.020857,noseal,s26-8164,s26-8164-00006.jpeg,noseal,train,0
6,0.981652,0.018348,noseal,s26-8164,s26-8164-00007.jpeg,noseal,train,0
7,0.982266,0.017734,noseal,s26-8164,s26-8164-00008.jpeg,noseal,train,0
8,0.982857,0.017143,noseal,s26-8164,s26-8164-00009.jpeg,noseal,train,0


In [25]:
preds_all['error'].mean()

0.043478260869565216

In [26]:
preds_all[preds_all['split'] == 'train']['error'].mean()

0.022900763358778626

In [27]:
preds_all[preds_all['split'] == 'valid']['error'].mean()

0.041176470588235294

In [28]:
preds_all[preds_all['split'] == 'test']['error'].mean()

0.06405693950177936

In [29]:
preds_all.to_csv(pwd + "models/" + str(experiment['model_id']) + '/frame_predictions.csv')