In [2]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Word2Vec

from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Conv1D, GlobalMaxPooling1D, LSTM, SimpleRNN, Dense, Dropout, Flatten, Bidirectional
from keras.layers import Input, concatenate, Activation
from keras.layers.embeddings import Embedding
from keras.models import Sequential, Model
from keras.regularizers import l2

stop_words = set(stopwords.words('english'))

import time

Using TensorFlow backend.


read in the data

In [3]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [4]:
#parse each row to get label vectors from json
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

Get dictionary for genre to its index in label vector

In [5]:
#incldue ALL genres of data set in genre dict as is
genre_dict = {'War': 0,
 'Family': 1,
 'Science Fiction': 2,
 'Thriller': 3,
 'Horror': 4,
 'Romance': 5,
 'Drama': 6,
 'Foreign': 7,
 'Documentary': 8,
 'Fantasy': 9,
 'Western': 10,
 'History': 11,
 'Comedy': 12,
 'Action': 13,
 'Adventure': 14,
 'Animation': 15,
 'Crime': 16,
 'Music': 17,
 'TV Movie': 18,
 'Mystery': 19}

In [6]:
#for mapping to coarse grained labels (in this situation we don't do that so labels map to self)
#maps to self here as we don't do anything special with labels in this file
genre_map = {'War': 'War',
 'Family': 'Family',
 'Science Fiction': 'Science Fiction',
 'Thriller': 'Thriller',
 'Horror': 'Horror',
 'Romance': 'Romance',
 'Drama': 'Drama',
 'Foreign': 'Foreign',
 'Documentary': 'Documentary',
 'Fantasy': 'Fantasy',
 'Western': 'Western',
 'History': 'History',
 'Comedy': 'Comedy',
 'Action': 'Action',
 'Adventure': 'Adventure',
 'Animation': 'Animation',
 'Crime': 'Crime',
 'Music': 'Music',
 'TV Movie': 'TV Movie',
 'Mystery': 'Mystery'}

In [7]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret

In [8]:
getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [9]:
#put to lower case, remove punctation, remove stopwords
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text

all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [10]:
all_data = all_data[all_data.genres_vect.map(sum) > 0] #drop rows that now have no labels 

In [11]:
#neural net data only needs a few cols
nn_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [12]:
train, test = train_test_split(nn_data, test_size=0.2, random_state=42)

Extract actual features and labels from train and test set

In [13]:
#gettrian and test features for classification. Just need text and lables for this
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [14]:
#convert labels from array of lists to numpy array

y_train = y.tolist()
y_train = np.array(y_train)

y_test = y_test.tolist()
y_test = np.array(y_test)

Get initial word embedding vectors

In [15]:
tok = [word_tokenize(ov) for ov in x]

In [16]:
word_vec_len = 32
w2v = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [17]:
num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150 #larger than averaage but not too large

#get actual train features to feed into neural nets for training
x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

In [18]:
test_sequences = tokenizer.texts_to_sequences(x_test)
#get actual test features to feed into neural nets for testing
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

Get word embeddings matrix for start input to neural net

In [19]:
#Citation: This technique to get word embeddings comes, with some minor changes, mostly from: 
#https://towardsdatascience.com/another-twitter-sentiment-analysis-with-python-part-11-cnn-word2vec-41f5e28eda74

embeddings_index = {}
for w in w2v.wv.vocab.keys():
    embeddings_index[w] = w2v.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

Below we define evlaution metric functions

In [20]:
def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [21]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better. Percent incorrectly chosen labels counting assignment and non-assignment equally
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])


#K is what we imported keras backend as

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [22]:
def get_all_metrics(actual_labels, predictions):
    print('Getting evaluation metrics for each label:')
    get_per_label_metrics(actual_labels, predictions)
    print('Getting evaluations for multilabel problem')
    print('Multilabel accuracy: ' + str(multi_label_accuracy(actual_labels, predictions)))
    print('Multilabel precision: ' + str(multi_label_precision(actual_labels, predictions)))
    print('Multilabel recall: ' + str(multi_label_recall(actual_labels, predictions)))
    print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(actual_labels, predictions))))

In [23]:
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
#not same as patience. Want to not even start looking until delay is reached
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__(monitor=monitor, min_delta=min_delta, patience=patience,verbose=verbose, mode=mode)
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [24]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

Convolutional Neural Networks

In [25]:
model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 3s - loss: 0.5312 - raw_multi_label_accuracy: 0.1011 - val_loss: 0.3963 - val_raw_multi_label_accuracy: 0.1695
Epoch 2/1000
 - 2s - loss: 0.4094 - raw_multi_label_accuracy: 0.1190 - val_loss: 0.3732 - val_raw_multi_label_accuracy: 0.1695
Epoch 3/1000
 - 2s - loss: 0.3829 - raw_multi_label_accuracy: 0.1121 - val_loss: 0.3615 - val_raw_multi_label_accuracy: 0.1695
Epoch 4/1000
 - 2s - loss: 0.3673 - raw_multi_label_accuracy: 0.1186 - val_loss: 0.3523 - val_raw_multi_label_accuracy: 0.1668
Epoch 5/1000
 - 2s - loss: 0.3569 - raw_multi_label_accuracy: 0.1109 - val_loss: 0.3464 - val_raw_multi_label_accuracy: 0.1695
Epoch

In [26]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [27]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9078726968174204
Precision for Family: 0.09090909090909091
Recall for Family: 0.021739130434782608

Accuruacy for Science Fiction: 0.8726968174204355
Precision for Science Fiction: 0.25
Recall for Science Fiction: 0.11290322580645161

Accuruacy for Thriller: 0.678391959798995
Precision for Thriller: 0.4267515923566879
Recall for Thriller: 0.39644970414201186

Accuruacy for Horror: 0.8458961474036851
Precision for Horror: 0.16666666666666666
Recall for Horror: 0.16071428571428573

Accuruacy for Romance: 0.7738693467336684
Precision for Romance: 0.3695652173913043
Recall for Romance: 0.3063063063063063

Accuruacy for Drama: 0.541038525963149
Precision for Drama: 0.5812274368231047
Recall for Drama: 0.5047021943573667

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.973

  'precision', 'predicted', average, warn_for)


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [28]:
model_input = Input(shape=(max_seq_len,), dtype='int32')
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 5s - loss: 2.6038 - raw_multi_label_accuracy: 0.1105 - val_loss: 1.8725 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 4s - loss: 1.4920 - raw_multi_label_accuracy: 0.1132 - val_loss: 1.0911 - val_raw_multi_label_accuracy: 0.1616
Epoch 3/1000
 - 4s - loss: 0.8891 - raw_multi_label_accuracy: 0.1127 - val_loss: 0.6829 - val_raw_multi_label_accuracy: 0.1702
Epoch 4/1000
 - 4s - loss: 0.5882 - raw_multi_label_accuracy: 0.1020 - val_loss: 0.4855 - val_raw_multi_label_accuracy: 0.0732
Epoch 5/1000
 - 4s - loss: 0.4422 - raw_multi_label_accuracy: 0.1052 - val_loss: 0.3937 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 4s - loss: 0.3760 - raw_multi_label_accuracy: 0.0995 - val_loss: 0.3521 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 4s - loss: 0.3460 - raw_multi_label_accuracy: 0.1003 - val_loss: 0.3356 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 8/1000
 - 4s - loss: 0.3332 - raw_multi_la

Epoch 65/1000
 - 4s - loss: 0.1352 - raw_multi_label_accuracy: 0.6318 - val_loss: 0.5176 - val_raw_multi_label_accuracy: 0.2083
Epoch 66/1000
 - 4s - loss: 0.1336 - raw_multi_label_accuracy: 0.6293 - val_loss: 0.5357 - val_raw_multi_label_accuracy: 0.2113
Epoch 67/1000
 - 3s - loss: 0.1342 - raw_multi_label_accuracy: 0.6359 - val_loss: 0.5279 - val_raw_multi_label_accuracy: 0.2091
Epoch 68/1000
 - 3s - loss: 0.1313 - raw_multi_label_accuracy: 0.6427 - val_loss: 0.5382 - val_raw_multi_label_accuracy: 0.1995
Epoch 69/1000
 - 3s - loss: 0.1320 - raw_multi_label_accuracy: 0.6402 - val_loss: 0.5387 - val_raw_multi_label_accuracy: 0.2107
Time to train with cross validation for early stopping: 249.66069889068604 seconds


In [29]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [30]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9212730318257957
Precision for Family: 0.4
Recall for Family: 0.043478260869565216

Accuruacy for Science Fiction: 0.8944723618090452
Precision for Science Fiction: 0.4
Recall for Science Fiction: 0.03225806451612903

Accuruacy for Thriller: 0.6850921273031826
Precision for Thriller: 0.4306569343065693
Recall for Thriller: 0.34911242603550297

Accuruacy for Horror: 0.9011725293132329
Precision for Horror: 0.2
Recall for Horror: 0.017857142857142856

Accuruacy for Romance: 0.7839195979899497
Precision for Romance: 0.3269230769230769
Recall for Romance: 0.15315315315315314

Accuruacy for Drama: 0.5963149078726968
Precision for Drama: 0.5994897959183674
Recall for Drama: 0.7366771159874608

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for D

Regular Neural Network

In [31]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 3s - loss: 0.3970 - raw_multi_label_accuracy: 0.1207 - val_loss: 0.3385 - val_raw_multi_label_accuracy: 0.0394
Epoch 2/1000
 - 2s - loss: 0.3245 - raw_multi_label_accuracy: 0.0915 - val_loss: 0.3283 - val_raw_multi_label_accuracy: 0.1172
Epoch 3/1000
 - 2s - loss: 0.3068 - raw_multi_label_accuracy: 0.1308 - val_loss: 0.3234 - val_raw_multi_label_accuracy: 0.0682
Epoch 4/1000
 - 2s - loss: 0.2976 - raw_multi_label_accuracy: 0.1342 - val_loss: 0.3261 - val_raw_multi_label_accuracy: 0.1342
Epoch 5/1000
 - 2s - loss: 0.2866 - raw_multi_label_accuracy: 0.1556 - val_loss: 0.3218 - val_raw_multi_label_accuracy: 0.1085
Epoch 6/1000
 - 2s - loss: 0.2737 - raw_multi_label_accuracy: 0.1899 - val_loss: 0.3211 - val_raw_multi_label_accuracy: 0.0717
Epoch 7/1000
 - 2s - loss: 0.2538 - raw_multi_label_accuracy: 0.2426 - val_loss: 0.3193 - val_raw_multi_label_accuracy: 0.0944
Epoch 8/1000
 - 2s - loss: 0.2288 - raw_multi_label_accuracy: 0.

In [32]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [33]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9229480737018425
Precision for Family: 0.0
Recall for Family: 0.0

Accuruacy for Science Fiction: 0.9112227805695142
Precision for Science Fiction: 1.0
Recall for Science Fiction: 0.14516129032258066

Accuruacy for Thriller: 0.7102177554438861
Precision for Thriller: 0.45652173913043476
Recall for Thriller: 0.1242603550295858

Accuruacy for Horror: 0.9061976549413735
Precision for Horror: 0.5
Recall for Horror: 0.017857142857142856

Accuruacy for Romance: 0.8157453936348409
Precision for Romance: 0.5172413793103449
Recall for Romance: 0.13513513513513514

Accuruacy for Drama: 0.6080402010050251
Precision for Drama: 0.6276276276276276
Recall for Drama: 0.6551724137931034

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
R

LSTM

In [34]:
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
lstm_model.add(Dense(256, activation='relu'))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 11s - loss: 0.4767 - raw_multi_label_accuracy: 0.1228 - val_loss: 0.3237 - val_raw_multi_label_accuracy: 0.1695
Epoch 2/1000
 - 9s - loss: 0.3177 - raw_multi_label_accuracy: 0.1011 - val_loss: 0.3173 - val_raw_multi_label_accuracy: 0.1695
Epoch 3/1000
 - 9s - loss: 0.3129 - raw_multi_label_accuracy: 0.0714 - val_loss: 0.3152 - val_raw_multi_label_accuracy: 0.1695
Epoch 4/1000
 - 9s - loss: 0.3124 - raw_multi_label_accuracy: 0.1299 - val_loss: 0.3147 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 9s - loss: 0.3120 - raw_multi_label_accuracy: 0.1015 - val_loss: 0.3147 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 10s - loss: 0.3117 - raw_multi_label_accuracy: 0.0892 - val_loss: 0.3140 - val_raw_multi_label_accuracy: 0.1002
Epoch 7/1000
 - 10s - loss: 0.3116 - raw_multi_label_accuracy: 0.0966 - val_loss: 0.3151 - val_raw_multi_label_accuracy: 0.1695
Epoch 8/1000
 - 9s - loss: 0.3111 - raw_multi_label_a

In [35]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [36]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9061976549413735
Precision for Family: 0.2727272727272727
Recall for Family: 0.13043478260869565

Accuruacy for Science Fiction: 0.8726968174204355
Precision for Science Fiction: 0.3157894736842105
Recall for Science Fiction: 0.1935483870967742

Accuruacy for Thriller: 0.7169179229480737
Precision for Thriller: 0.5
Recall for Thriller: 0.41420118343195267

Accuruacy for Horror: 0.9078726968174204
Precision for Horror: 0.5454545454545454
Recall for Horror: 0.10714285714285714

Accuruacy for Romance: 0.7788944723618091
Precision for Romance: 0.3939393939393939
Recall for Romance: 0.35135135135135137

Accuruacy for Drama: 0.5728643216080402
Precision for Drama: 0.5958083832335329
Recall for Drama: 0.6238244514106583

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.97319

simple rnn

In [37]:
rnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
rnn.add(e)
rnn.add(SimpleRNN(32, activation = 'relu'))
rnn.add(Dense(256, activation='relu'))
rnn.add(Dense(len(genre_dict), activation='sigmoid'))
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
rnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 4s - loss: 0.5942 - raw_multi_label_accuracy: 0.0341 - val_loss: 0.4257 - val_raw_multi_label_accuracy: 0.0493
Epoch 2/1000
 - 2s - loss: 0.3674 - raw_multi_label_accuracy: 0.0896 - val_loss: 0.3447 - val_raw_multi_label_accuracy: 0.0749
Epoch 3/1000
 - 2s - loss: 0.3316 - raw_multi_label_accuracy: 0.1328 - val_loss: 0.3278 - val_raw_multi_label_accuracy: 0.0323
Epoch 4/1000
 - 2s - loss: 0.3150 - raw_multi_label_accuracy: 0.0965 - val_loss: 0.3179 - val_raw_multi_label_accuracy: 0.0536
Epoch 5/1000
 - 2s - loss: 0.3055 - raw_multi_label_accuracy: 0.1362 - val_loss: 0.3173 - val_raw_multi_label_accuracy: 0.0362
Epoch 6/1000
 - 2s - loss: 0.2999 - raw_multi_label_accuracy: 0.0863 - val_loss: 0.3141 - val_raw_multi_label_accuracy: 0.1616
Epoch 7/1000
 - 2s - loss: 0.2925 - raw_multi_label_accuracy: 0.1250 - val_loss: 0.3146 - val_raw_multi_label_accuracy: 0.1633
Epoch 8/1000
 - 2s - loss: 0.2853 - raw_multi_label_accuracy: 0.

In [38]:
predictions = nn_output_to_predictions(rnn.predict(x_test_seq))

In [39]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.9631490787269682
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9112227805695142
Precision for Family: 0.18181818181818182
Recall for Family: 0.043478260869565216

Accuruacy for Science Fiction: 0.8860971524288107
Precision for Science Fiction: 0.36363636363636365
Recall for Science Fiction: 0.12903225806451613

Accuruacy for Thriller: 0.6298157453936348
Precision for Thriller: 0.29365079365079366
Recall for Thriller: 0.21893491124260356

Accuruacy for Horror: 0.8844221105527639
Precision for Horror: 0.15789473684210525
Recall for Horror: 0.05357142857142857

Accuruacy for Romance: 0.7453936348408711
Precision for Romance: 0.2696629213483146
Recall for Romance: 0.21621621621621623

Accuruacy for Drama: 0.5745393634840871
Precision for Drama: 0.5920679886685553
Recall for Drama: 0.6551724137931034

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy fo

bidirectional-LSTM

In [40]:
bi_lstm = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
bi_lstm.add(e)
bi_lstm.add(Bidirectional(LSTM(100, dropout=0.25, recurrent_dropout=0.25)))
bi_lstm.add(Dense(256, activation='relu'))
bi_lstm.add(Dense(len(genre_dict), activation='sigmoid'))
bi_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
bi_lstm.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 22s - loss: 0.4839 - raw_multi_label_accuracy: 0.1335 - val_loss: 0.3218 - val_raw_multi_label_accuracy: 0.1179
Epoch 2/1000
 - 16s - loss: 0.3163 - raw_multi_label_accuracy: 0.0942 - val_loss: 0.3157 - val_raw_multi_label_accuracy: 0.0112
Epoch 3/1000
 - 16s - loss: 0.3130 - raw_multi_label_accuracy: 0.0994 - val_loss: 0.3149 - val_raw_multi_label_accuracy: 0.0353
Epoch 4/1000
 - 17s - loss: 0.3122 - raw_multi_label_accuracy: 0.0907 - val_loss: 0.3153 - val_raw_multi_label_accuracy: 0.1695
Epoch 5/1000
 - 18s - loss: 0.3125 - raw_multi_label_accuracy: 0.1151 - val_loss: 0.3148 - val_raw_multi_label_accuracy: 0.1000
Epoch 6/1000
 - 18s - loss: 0.3120 - raw_multi_label_accuracy: 0.0896 - val_loss: 0.3149 - val_raw_multi_label_accuracy: 0.1695
Epoch 7/1000
 - 17s - loss: 0.3115 - raw_multi_label_accuracy: 0.0853 - val_loss: 0.3157 - val_raw_multi_label_accuracy: 0.0476
Epoch 8/1000
 - 16s - loss: 0.3119 - raw_multi_label_accu

In [41]:
predictions = nn_output_to_predictions(bi_lstm.predict(x_test_seq))

In [42]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.916247906197655
Precision for Family: 0.25
Recall for Family: 0.043478260869565216

Accuruacy for Science Fiction: 0.8911222780569514
Precision for Science Fiction: 0.4
Recall for Science Fiction: 0.0967741935483871

Accuruacy for Thriller: 0.6633165829145728
Precision for Thriller: 0.3904109589041096
Recall for Thriller: 0.33727810650887574

Accuruacy for Horror: 0.897822445561139
Precision for Horror: 0.41379310344827586
Recall for Horror: 0.21428571428571427

Accuruacy for Romance: 0.8241206030150754
Precision for Romance: 0.5416666666666666
Recall for Romance: 0.35135135135135137

Accuruacy for Drama: 0.5611390284757118
Precision for Drama: 0.5959595959595959
Recall for Drama: 0.554858934169279

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Pre