In [4]:
import tensorflow as tf
import torch
import numpy as np
import pandas as pd
import sys
import os
import joblib

np.random.seed(42)

# Where to get the data
PATH = os.path.join(os.getcwd(), "data_dir")

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

## 1. Get the data

In [16]:
def fetch_movies(path=PATH):
    """Load .pkl movie files"""
    filenames = glob.glob(os.path.join(PATH, "tt*.pkl"))
    movies = []
    for fn in filenames:
        movie = joblib.load(fn)
        movies.append(movie)
    return movies

In [24]:
def split_train_test(data, train_size=60):
    """Split data into train and test sets"""
    # For stable output across runs
    np.random.seed(42)
    # Shuffle indices
    shuffled_indices = np.random.permutation(len(data))
    train_indices = shuffled_indices[:train_size]
    test_indices = shuffled_indices[train_size:]
    train_set = [data[i] for i in train_indices]
    test_set = [data[i] for i in test_indices]
    return train_set, test_set

In [31]:
# def transform_movies(movies, features=['place', 'cast', 'action', 'audio']):
#     """Unroll the given features by column and separate features from labels"""
#     X, Y = [], []
#     for movie in movies: 
#         row = torch.cat([movie[feat] for feat in features], dim=1)
#         X.append(row.numpy())
#         # Pre-pad the label since its length is N-1
#         labels = movie['scene_transition_boundary_ground_truth']
#         labels = torch.cat([torch.tensor([False]), labels])
#         Y.append(labels.numpy())
#     return X, Y

In [None]:
# from tensorflow.keras.preprocessing.sequence import pad_sequences

# # Pad sequences
# X_train_pad = pad_sequences(X_train, value=99, maxlen=max_movie_length, padding='post', dtype='float32')
# X_val_pad = pad_sequences(X_val, value=99, maxlen=max_movie_length, padding='post', dtype='float32')
# y_train_pad = pad_sequences(y_train, value=False, maxlen=max_movie_length, padding='post')
# y_val_pad = pad_sequences(y_val, value=False, maxlen=max_movie_length, padding='post')

# print('X_train_pad shape: {}'.format(X_train_pad.shape))
# print('X_val_pad shape: {}'.format(X_val_pad.shape))
# print('y_train_pad shape: {}'.format(y_train_pad.shape))
# print('y_val_pad shape: {}'.format(y_val_pad.shape))

In [33]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def transform_movies(movies, features=['place', 'cast', 'action', 'audio'], pad_len=4000):
    """
    Unroll the given features by column and separate features from labels.
    Then pad the sequences in each movie to the length of the longest movie.
    """
    X, Y = [], []
    # Unroll the features
    for movie in movies: 
        row = torch.cat([movie[feat] for feat in features], dim=1)
        X.append(row.numpy())
        # Pre-pad the label since its length is N-1
        labels = movie['scene_transition_boundary_ground_truth']
        labels = torch.cat([torch.tensor([False]), labels])
        Y.append(labels.numpy())
    # Pad the sequences
    X_padded = pad_sequences(X, maxlen=pad_len, padding='post', dtype='float32')
    Y_padded = pad_sequences(Y, value=False, maxlen=pad_len, padding='post')
    return X_padded, Y_padded

In [None]:
# Get the train and test data sets
movies = fetch_movies()

In [None]:
# Longest movie length
max_movie_length = max([movie['place'].shape[0] for movie in movies])
max_movie_length

In [27]:
# Split movies into training and validation sets
movies_train, movies_val = split_train_test(movies)

In [34]:
# Transform training and validation sets
X_train, y_train = transform_movies(movies_train)
X_val, y_val = transform_movies(movies_val)

In [38]:
X_train[35].shape

(4000, 3584)

In [51]:
y_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]], dtype=int32)

## 2. Build and evaluate models

In [None]:
ACCURACY_THRESHOLD = 0.95

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > ACCURACY_THRESHOLD):
            print("\nReached 95% accuracy, so cancelling training!")
            self.model.stop_training = True

In [None]:
INPUT_DIM = 2048 + 512 + 512 + 512
NUM_EPOCHS = 20

### 2.1 LSTM model

In [None]:
lstm_model = tf.keras.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=256,
                                                       input_shape=[INPUT_DIM],
                                                       return_sequences=True)),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation='sigmoid'))
])

In [None]:
lstm_model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(lr=2e-5),
              metrics=['accuracy'])

In [None]:
%%time
lstm_model.fit(X_train_pad, 
          y_train_pad, 
          epochs=NUM_EPOCHS, 
          callbacks=[myCallback()],
          validation_data=(X_val_pad, y_val_pad),
          verbose=2)

### 2.2 WaveNet model

In [None]:
wave_model = tf.keras.models.Sequential()
wave_model.add(tf.keras.layers.InputLayer(input_shape=[None, INPUT_DIM]))
for rate in (1, 2, 4, 8) * 2:
    wave_model.add(tf.keras.layers.Conv1D(filters=20, kernel_size=2, padding="same",
                                     activation="relu", dilation_rate=rate))
wave_model.add(tf.keras.layers.Conv1D(filters=10, kernel_size=1))
wave_model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

wave_model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(lr=2e-5),
              metrics=['accuracy'])

In [None]:
%%time
wave_model.fit(X_train_pad, 
          y_train_pad, 
          epochs=NUM_EPOCHS, 
          callbacks=[myCallback()],
          validation_data=(X_val_pad, y_val_pad),
          verbose=2)

In [None]:
# predict probabilities for validation set
yhat_probs = wave_model.predict(X_val_pad)

In [None]:
yhat_classes = wave_model.predict_classes(X_val_pad)

In [None]:
sum(sum(yhat_classes))

In [None]:
yhat_probs.shape

In [None]:
yhat_classes