In [1]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Word2Vec

from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Conv1D, GlobalMaxPooling1D, LSTM, SimpleRNN, Dense, Dropout, Flatten, Bidirectional
from keras.layers import Input, concatenate, Activation
from keras.layers.embeddings import Embedding
from keras.models import Sequential, Model
from keras.regularizers import l2

stop_words = set(stopwords.words('english'))

import time

Using TensorFlow backend.


read in the data

In [2]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [3]:
#parse each row to get label vectors from json
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

Get dictionary for genre to its index in label vector

In [4]:
genre_dict = {}
genre_dict['Action-Adventure'] = 0
genre_dict['Romance'] = 1
genre_dict['Horror-Thriller'] = 2
genre_dict['Comedy'] = 3
genre_dict['Science Fiction'] = 4
#genre_dict['Drama'] = 5
genre_dict

{'Action-Adventure': 0,
 'Romance': 1,
 'Horror-Thriller': 2,
 'Comedy': 3,
 'Science Fiction': 4}

In [5]:
#map original labels to more coarse grained labels
genre_map = {}
genre_map['Adventure'] = 'Action-Adventure'
genre_map['Romance'] = 'Romance'
genre_map['Horror'] = 'Horror-Thriller'
genre_map['Thriller'] = 'Horror-Thriller'
genre_map['Comedy'] = 'Comedy'
#genre_map['War'] = 'Action-Adventure'#not sure about this
genre_map['Action'] = 'Action-Adventure'
genre_map['Science Fiction'] = 'Science Fiction'
#genre_map['Drama'] = 'Drama'

In [6]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret

In [7]:
getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [8]:
#put to lower case, remove punctation, remove stopwords
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text

all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [9]:
all_data = all_data[all_data.genres_vect.map(sum) > 0] #drop rows that now have no labels 

In [10]:
#neural net data only needs a few cols
nn_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [11]:
train, test = train_test_split(nn_data, test_size=0.2, random_state=42)

Extract actual features and labels from train and test set

In [12]:
#gettrian and test features for classification. Just need text and lables for this
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [13]:
#convert labels from array of lists to numpy array

y_train = y.tolist()
y_train = np.array(y_train)

y_test = y_test.tolist()
y_test = np.array(y_test)

Get initial word embedding vectors

In [14]:
tok = [word_tokenize(ov) for ov in x]

In [15]:
word_vec_len = 32
w2v = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [16]:
num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150 #larger than averaage but not too large

#get actual train features to feed into neural nets for training
x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

In [17]:
test_sequences = tokenizer.texts_to_sequences(x_test)
#get actual test features to feed into neural nets for testing
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

Get word embeddings matrix for start input to neural net

In [18]:
#Citation: This technique to get word embeddings comes, with some minor changes, mostly from: 
#https://towardsdatascience.com/another-twitter-sentiment-analysis-with-python-part-11-cnn-word2vec-41f5e28eda74

embeddings_index = {}
for w in w2v.wv.vocab.keys():
    embeddings_index[w] = w2v.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

Below we define evlaution metric functions

In [19]:
def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [20]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better. Percent incorrectly chosen labels counting assignment and non-assignment equally
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])


#K is what we imported keras backend as

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [21]:
def get_all_metrics(actual_labels, predictions):
    print('Getting evaluation metrics for each label:')
    get_per_label_metrics(actual_labels, predictions)
    print('Getting evaluations for multilabel problem')
    print('Multilabel accuracy: ' + str(multi_label_accuracy(actual_labels, predictions)))
    print('Multilabel precision: ' + str(multi_label_precision(actual_labels, predictions)))
    print('Multilabel recall: ' + str(multi_label_recall(actual_labels, predictions)))
    print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(actual_labels, predictions))))

In [22]:
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
#not same as patience. Want to not even start looking until delay is reached
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__(monitor=monitor, min_delta=min_delta, patience=patience,verbose=verbose, mode=mode)
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [23]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

Convolutional Neural Networks

In [24]:
model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 2s - loss: 0.6974 - raw_multi_label_accuracy: 0.0946 - val_loss: 0.6588 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 1s - loss: 0.6591 - raw_multi_label_accuracy: 0.0431 - val_loss: 0.6391 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 1s - loss: 0.6418 - raw_multi_label_accuracy: 0.0128 - val_loss: 0.6272 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 1s - loss: 0.6307 - raw_multi_label_accuracy: 0.0343 - val_loss: 0.6188 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 1s - loss: 0.6182 - raw_multi_label_accuracy: 0.0177 - val_loss: 0.6120 - val_raw_multi_label_accura

In [25]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [26]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6687370600414079
Precision for Action-Adventure: 0.6097560975609756
Recall for Action-Adventure: 0.40106951871657753

Accuruacy for Romance: 0.7660455486542443
Precision for Romance: 0.4533333333333333
Recall for Romance: 0.32075471698113206

Accuruacy for Horror-Thriller: 0.6625258799171843
Precision for Horror-Thriller: 0.6106870229007634
Recall for Horror-Thriller: 0.4166666666666667

Accuruacy for Comedy: 0.6169772256728778
Precision for Comedy: 0.5693069306930693
Recall for Comedy: 0.539906103286385

Accuruacy for Science Fiction: 0.8861283643892339
Precision for Science Fiction: 0.5217391304347826
Recall for Science Fiction: 0.21428571428571427

Getting evaluations for multilabel problem
Multilabel accuracy: 0.36404416839199455
Multilabel precision: 0.5747549019607844
Multilabel recall: 0.43840579710144933
Percent of correctly decided label decisions: 72.00828157349896


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [27]:
model_input = Input(shape=(max_seq_len,), dtype='int32')
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 4s - loss: 2.8438 - raw_multi_label_accuracy: 0.0657 - val_loss: 2.2403 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 3s - loss: 1.8801 - raw_multi_label_accuracy: 0.0341 - val_loss: 1.5013 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 3s - loss: 1.2861 - raw_multi_label_accuracy: 0.0279 - val_loss: 1.0670 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 3s - loss: 0.9513 - raw_multi_label_accuracy: 0.0223 - val_loss: 0.8285 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 3s - loss: 0.7723 - raw_multi_label_accuracy: 0.0202 - val_loss: 0.7064 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 3s - loss: 0.6776 - raw_multi_label_accuracy: 0.0223 - val_loss: 0.6444 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 3s - loss: 0.6292 - raw_multi_label_accuracy: 0.0148 - val_loss: 0.6151 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 8/1000
 - 3s - loss: 0.6040 - 

In [28]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [29]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.639751552795031
Precision for Action-Adventure: 0.5403726708074534
Recall for Action-Adventure: 0.46524064171123

Accuruacy for Romance: 0.7805383022774327
Precision for Romance: 0.5
Recall for Romance: 0.1509433962264151

Accuruacy for Horror-Thriller: 0.6128364389233955
Precision for Horror-Thriller: 0.5139664804469274
Recall for Horror-Thriller: 0.4791666666666667

Accuruacy for Comedy: 0.6252587991718427
Precision for Comedy: 0.5851063829787234
Recall for Comedy: 0.5164319248826291

Accuruacy for Science Fiction: 0.8716356107660456
Precision for Science Fiction: 0.125
Recall for Science Fiction: 0.017857142857142856

Getting evaluations for multilabel problem
Multilabel accuracy: 0.3609385783298828
Multilabel precision: 0.5493119266055045
Multilabel recall: 0.4259834368530021
Percent of correctly decided label decisions: 70.60041407867494


Regular Neural Network

In [30]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 2s - loss: 0.6163 - raw_multi_label_accuracy: 0.0509 - val_loss: 0.5952 - val_raw_multi_label_accuracy: 0.0264
Epoch 2/1000
 - 1s - loss: 0.5807 - raw_multi_label_accuracy: 0.0537 - val_loss: 0.5917 - val_raw_multi_label_accuracy: 0.0917
Epoch 3/1000
 - 1s - loss: 0.5571 - raw_multi_label_accuracy: 0.1211 - val_loss: 0.5984 - val_raw_multi_label_accuracy: 0.1064
Epoch 4/1000
 - 1s - loss: 0.5299 - raw_multi_label_accuracy: 0.2308 - val_loss: 0.6063 - val_raw_multi_label_accuracy: 0.1964
Epoch 5/1000
 - 1s - loss: 0.4834 - raw_multi_label_accuracy: 0.3588 - val_loss: 0.6043 - val_raw_multi_label_accuracy: 0.2078
Epoch 6/1000
 - 1s - loss: 0.3984 - raw_multi_label_accuracy: 0.5356 - val_loss: 0.6027 - val_raw_multi_label_accuracy: 0.2023
Epoch 7/1000
 - 1s - loss: 0.2982 - raw_multi_label_accuracy: 0.6794 - val_loss: 0.6184 - val_raw_multi_label_accuracy: 0.1789
Epoch 8/1000
 - 1s - loss: 0.2165 - raw_multi_label_accuracy: 0.

In [31]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [32]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.639751552795031
Precision for Action-Adventure: 0.5619047619047619
Recall for Action-Adventure: 0.3155080213903743

Accuruacy for Romance: 0.772256728778468
Precision for Romance: 0.4411764705882353
Recall for Romance: 0.14150943396226415

Accuruacy for Horror-Thriller: 0.6128364389233955
Precision for Horror-Thriller: 0.5316455696202531
Recall for Horror-Thriller: 0.21875

Accuruacy for Comedy: 0.5734989648033126
Precision for Comedy: 0.5251798561151079
Recall for Comedy: 0.3427230046948357

Accuruacy for Science Fiction: 0.8944099378881988
Precision for Science Fiction: 0.7272727272727273
Recall for Science Fiction: 0.14285714285714285

Getting evaluations for multilabel problem
Multilabel accuracy: 0.2454106280193236
Multilabel precision: 0.5300546448087431
Multilabel recall: 0.2708764665286404
Percent of correctly decided label decisions: 69.85507246376812


LSTM

In [33]:
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
lstm_model.add(Dense(256, activation='relu'))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 9s - loss: 0.6397 - raw_multi_label_accuracy: 0.0339 - val_loss: 0.5911 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 7s - loss: 0.5939 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5826 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 7s - loss: 0.5895 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5819 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 7s - loss: 0.5873 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5827 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 7s - loss: 0.5857 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5822 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 7s - loss: 0.5785 - raw_multi_label_accuracy: 3.9043e-04 - val_loss: 0.5748 - val_raw_multi_label_accuracy: 0.0103
Epoch 7/1000
 - 7s - loss: 0.5538 - raw_multi_label_accuracy: 0.1307 - val_loss: 0.5478 - val_raw_multi_label_accuracy: 0.1150
Epoch 8/1000
 - 7s - los

In [34]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [35]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6915113871635611
Precision for Action-Adventure: 0.5818965517241379
Recall for Action-Adventure: 0.7219251336898396

Accuruacy for Romance: 0.7432712215320911
Precision for Romance: 0.40625
Recall for Romance: 0.36792452830188677

Accuruacy for Horror-Thriller: 0.6273291925465838
Precision for Horror-Thriller: 0.527027027027027
Recall for Horror-Thriller: 0.609375

Accuruacy for Comedy: 0.6749482401656315
Precision for Comedy: 0.609375
Recall for Comedy: 0.7323943661971831

Accuruacy for Science Fiction: 0.8737060041407867
Precision for Science Fiction: 0.391304347826087
Recall for Science Fiction: 0.16071428571428573

Getting evaluations for multilabel problem
Multilabel accuracy: 0.48126293995859243
Multilabel precision: 0.5838509316770186
Multilabel recall: 0.6399240855762595
Percent of correctly decided label decisions: 72.21532091097309


simple rnn

In [36]:
rnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
rnn.add(e)
rnn.add(SimpleRNN(32, activation = 'relu'))
rnn.add(Dense(256, activation='relu'))
rnn.add(Dense(len(genre_dict), activation='sigmoid'))
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
rnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 3s - loss: 0.6609 - raw_multi_label_accuracy: 0.0233 - val_loss: 0.6063 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 2s - loss: 0.5980 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5868 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 2s - loss: 0.5815 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5808 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 2s - loss: 0.5636 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5806 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 2s - loss: 0.5392 - raw_multi_label_accuracy: 0.0068 - val_loss: 0.5777 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 2s - loss: 0.4954 - raw_multi_label_accuracy: 0.1017 - val_loss: 0.5780 - val_raw_multi_label_accuracy: 0.0929
Epoch 7/1000
 - 2s - loss: 0.4200 - raw_multi_label_accuracy: 0.4414 - val_loss: 0.5949 - val_raw_multi_label_accuracy: 0.1559
Epoch 8/1000
 - 2s - loss: 0.288

In [37]:
predictions = nn_output_to_predictions(rnn.predict(x_test_seq))

In [38]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.5900621118012422
Precision for Action-Adventure: 0.46258503401360546
Recall for Action-Adventure: 0.36363636363636365

Accuruacy for Romance: 0.6252587991718427
Precision for Romance: 0.1951219512195122
Recall for Romance: 0.22641509433962265

Accuruacy for Horror-Thriller: 0.5445134575569358
Precision for Horror-Thriller: 0.375
Recall for Horror-Thriller: 0.21875

Accuruacy for Comedy: 0.5196687370600414
Precision for Comedy: 0.42748091603053434
Recall for Comedy: 0.26291079812206575

Accuruacy for Science Fiction: 0.9047619047619048
Precision for Science Fiction: 1.0
Recall for Science Fiction: 0.17857142857142858

Getting evaluations for multilabel problem
Multilabel accuracy: 0.22291235334713594
Multilabel precision: 0.3794604003481288
Multilabel recall: 0.2757073844030366
Percent of correctly decided label decisions: 63.68530020703933


bidirectional-LSTM

In [39]:
bi_lstm = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
bi_lstm.add(e)
bi_lstm.add(Bidirectional(LSTM(100, dropout=0.25, recurrent_dropout=0.25)))
bi_lstm.add(Dense(256, activation='relu'))
bi_lstm.add(Dense(len(genre_dict), activation='sigmoid'))
bi_lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
start = time.time()
bi_lstm.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)
end = time.time()
print('Time to train with cross validation for early stopping: ' + str(end-start) + ' seconds')

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 15s - loss: 0.6366 - raw_multi_label_accuracy: 0.0279 - val_loss: 0.5917 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 11s - loss: 0.5953 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5826 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 11s - loss: 0.5888 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5850 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 11s - loss: 0.5874 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5823 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 11s - loss: 0.5850 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5821 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 11s - loss: 0.5798 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5785 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 12s - loss: 0.5694 - raw_multi_label_accuracy: 0.0511 - val_loss: 0.5813 - val_raw_multi_label_accuracy: 0.0203
Epoch 8/1000


In [40]:
predictions = nn_output_to_predictions(bi_lstm.predict(x_test_seq))

In [41]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.639751552795031
Precision for Action-Adventure: 0.5232974910394266
Recall for Action-Adventure: 0.7807486631016043

Accuruacy for Romance: 0.7556935817805382
Precision for Romance: 0.42105263157894735
Recall for Romance: 0.3018867924528302

Accuruacy for Horror-Thriller: 0.6708074534161491
Precision for Horror-Thriller: 0.5647058823529412
Recall for Horror-Thriller: 0.75

Accuruacy for Comedy: 0.7101449275362319
Precision for Comedy: 0.6377358490566037
Recall for Comedy: 0.7934272300469484

Accuruacy for Science Fiction: 0.8840579710144928
Precision for Science Fiction: 0.5
Recall for Science Fiction: 0.21428571428571427

Getting evaluations for multilabel problem
Multilabel accuracy: 0.49875776397515575
Multilabel precision: 0.5883367839889578
Multilabel recall: 0.6992753623188407
Percent of correctly decided label decisions: 73.20910973084887
