In [1]:
import tensorflow as tf
from tensorflow import keras
import torch
import numpy as np
import pandas as pd
import glob
import sys
import os
import joblib
import pickle

np.random.seed(42)

# Where to get the data
PATH = os.path.join(os.getcwd(), "data_dir")

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

## 1. Get the data

In [2]:
def fetch_movies(path=PATH):
    """Load .pkl movie files"""
    filenames = glob.glob(os.path.join(PATH, "tt*.pkl"))
    movies = []
    for fn in filenames:
        try:
            with open(fn, 'rb') as fin:
                movies.append(pickle.load(fin))
        except EOFError:
            break
    return movies

In [3]:
movies = fetch_movies()

In [4]:
def split_train_test(data, train_size=60):
    """Split data into train and test sets"""
    # For stable output across runs
    np.random.seed(42)
    # Shuffle indices
    shuffled_indices = np.random.permutation(len(data))
    train_indices = shuffled_indices[:train_size]
    test_indices = shuffled_indices[train_size:]
    train_set = [data[i] for i in train_indices]
    test_set = [data[i] for i in test_indices]
    return train_set, test_set

In [5]:
# Get the train and test data sets
movies = fetch_movies()

In [None]:
# Split movies into training and validation sets
# movies_train, movies_val = split_train_test(movies)

In [None]:
# Longest movie length
# max_movie_length = max([movie['place'].shape[0] for movie in movies])
# max_movie_length

## 2. Data processing

In [6]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def transform_movies(movies, features=['place', 'cast', 'action', 'audio'], pad_len=4000):
    """
    Unroll the given features by column and separate features from labels.
    Then pad the sequences in each movie to the length of the longest movie.
    """
    X, Y = [], []
    # Unroll the features
    for movie in movies: 
        row = torch.cat([movie[feat] for feat in features], dim=1)
        X.append(row.numpy())
        # Pre-pad the label since its length is N-1
        labels = movie['scene_transition_boundary_ground_truth']
        labels = torch.cat([torch.tensor([False]), labels])
        Y.append(labels.numpy())
    # Pad the sequences
    X_padded = pad_sequences(X, maxlen=pad_len, padding='post', dtype='float32')
    Y_padded = pad_sequences(Y, value=False, maxlen=pad_len, padding='post')
    return X_padded, Y_padded

In [None]:
# # Transform training and validation sets
# X_train, y_train = transform_movies(movies_train)
# X_val, y_val = transform_movies(movies_val)

## 3. Build and train models

In [7]:
# First, transform the entire dataset 
X, y = transform_movies(movies)

In [8]:
ACCURACY_THRESHOLD = 0.95

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > ACCURACY_THRESHOLD):
            print("\nReached 95% accuracy, so cancelling training!")
            self.model.stop_training = True

In [9]:
INPUT_DIM = 2048 + 512 + 512 + 512
NUM_EPOCHS = 20

In [10]:
def unpad_predictions(movies, yhat_probs):
    """Truncate the padded predictions to movie's original length"""
    imdb_lengths = [(movie['imdb_id'], movie['place'].shape[0]) for movie in movies]
    yhat_dict = dict()
    for (imdb, length), yhat in zip(imdb_lengths, yhat_probs):
        yhat = yhat[1:length]
        yhat_dict[imdb] = yhat
    return yhat_dict

In [11]:
def write_predictions(yhat_unpadded_dict, path=PATH):
    for imdb in yhat_unpadded_dict.keys():
        # Load existing pkl movie file
        filename = os.path.join(PATH, imdb + ".pkl")
        try:
            x = pickle.load(open(filename, "rb"))
            x['scene_transition_boundary_prediction'] = yhat_unpadded_dict[imdb].flatten()
            pickle.dump(x, open(filename, "wb"))
        except:
            break

### 3.1 LSTM model

In [12]:
def build_lstm(n_neurons=32, input_shape=[4000]):
    model = tf.keras.Sequential([
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=n_neurons,
                                                               input_shape=input_shape,
                                                               return_sequences=True)),
            tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation='sigmoid'))])
    model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(lr=2e-5),
              metrics=['accuracy'])
    return model

In [13]:
lstm_clf = build_lstm()

In [14]:
lstm_clf.fit(X, y, epochs=NUM_EPOCHS, callbacks=[myCallback()], verbose=2)

Epoch 1/5
2/2 - 125s - loss: 0.6985 - accuracy: 0.7822
Epoch 2/5
2/2 - 124s - loss: 0.6401 - accuracy: 0.9351
Epoch 3/5
2/2 - 108s - loss: 0.6095 - accuracy: 0.9623

Reached 95% accuracy, so cancelling training!


<tensorflow.python.keras.callbacks.History at 0x7fcc0e2583a0>

In [15]:
lstm_yhat = lstm_clf.predict(X)

In [16]:
lstm_yhat_unpadded = unpad_predictions(movies, lstm_yhat)

In [17]:
write_predictions(lstm_yhat_unpadded)

In [None]:
# lstm_clf = tf.keras.Sequential([
#     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=256,
#                                                        input_shape=[INPUT_DIM],
#                                                        return_sequences=True)),
#     tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1, activation='sigmoid'))
# ])

# lstm_clf.compile(loss='binary_crossentropy',
#               optimizer=tf.keras.optimizers.RMSprop(lr=2e-5),
#               metrics=['accuracy'])

In [None]:
# %%time
# lstm_clf.fit(X, 
#           y, 
#           epochs=1, 
#           callbacks=[myCallback()],
#           verbose=2)

### 3.2 WaveNet model

In [30]:
def build_wave_net(input_shape=[None, INPUT_DIM], filters1=20, filters2=10, kern1=2, kern2=1, padding='same'):
    wave_model = tf.keras.models.Sequential()
    wave_model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
    for rate in (1, 2, 4, 8) * 2:
        wave_model.add(tf.keras.layers.Conv1D(filters=filters1, 
                                              kernel_size=kern1, 
                                              padding='same',
                                              activation='relu', dilation_rate=rate))
    wave_model.add(tf.keras.layers.Conv1D(filters=filters2, kernel_size=kern2))
    wave_model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    wave_model.compile(loss='binary_crossentropy',
                  optimizer=tf.keras.optimizers.RMSprop(lr=2e-5),
                  metrics=['accuracy'])
    return wave_model

In [31]:
wave_clf = build_wave_net()

In [32]:
wave_clf.fit(X, y, epochs=NUM_EPOCHS, callbacks=[myCallback()], verbose=2)

Epoch 1/5
2/2 - 43s - loss: 0.7007 - accuracy: 0.6144
Epoch 2/5
2/2 - 28s - loss: 0.6988 - accuracy: 0.6151
Epoch 3/5
2/2 - 29s - loss: 0.6978 - accuracy: 0.6157
Epoch 4/5
2/2 - 14s - loss: 0.6971 - accuracy: 0.6164
Epoch 5/5
2/2 - 11s - loss: 0.6965 - accuracy: 0.6172


<tensorflow.python.keras.callbacks.History at 0x7fc5f4987e50>

In [33]:
wave_hat = wave_clf.predict(X)

In [34]:
wave_hat_unpadded2 = unpad_predictions(movies, wave_hat)