In [1]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Word2Vec

from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Conv1D, GlobalMaxPooling1D, LSTM, SimpleRNN, Dense, Dropout, Flatten
from keras.layers import Input, concatenate, Activation
from keras.layers.embeddings import Embedding
from keras.models import Sequential, Model
from keras.regularizers import l2

stop_words = set(stopwords.words('english'))

Using TensorFlow backend.


read in the data

In [2]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [3]:
#parse each row to get label vectors from json
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

Get dictionary for genre to its index in label vector

In [4]:
genre_dict = {}
genre_dict['Action-Adventure'] = 0
genre_dict['Romance'] = 1
genre_dict['Horror-Thriller'] = 2
genre_dict['Comedy'] = 3
genre_dict['Science Fiction'] = 4
#genre_dict['Drama'] = 5
genre_dict

{'Action-Adventure': 0,
 'Romance': 1,
 'Horror-Thriller': 2,
 'Comedy': 3,
 'Science Fiction': 4}

In [5]:
#map original labels to more coarse grained labels
genre_map = {}
genre_map['Adventure'] = 'Action-Adventure'
genre_map['Romance'] = 'Romance'
genre_map['Horror'] = 'Horror-Thriller'
genre_map['Thriller'] = 'Horror-Thriller'
genre_map['Comedy'] = 'Comedy'
#genre_map['War'] = 'Action-Adventure'#not sure about this
genre_map['Action'] = 'Action-Adventure'
genre_map['Science Fiction'] = 'Science Fiction'
#genre_map['Drama'] = 'Drama'

In [6]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret

In [7]:
getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [8]:
#put to lower case, remove punctation, remove stopwords
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text

all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [9]:
all_data = all_data[all_data.genres_vect.map(sum) > 0] #drop rows that now have no labels 

In [10]:
#neural net data only needs a few cols
nn_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [11]:
train, test = train_test_split(nn_data, test_size=0.2, random_state=42)

Extract actual features and labels from train and test set

In [12]:
#gettrian and test features for classification. Just need text and lables for this
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [13]:
#convert labels from array of lists to numpy array

y_train = y.tolist()
y_train = np.array(y_train)

y_test = y_test.tolist()
y_test = np.array(y_test)

Get initial word embedding vectors

In [14]:
tok = [word_tokenize(ov) for ov in x]

In [15]:
word_vec_len = 32
w2v = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [16]:
num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150 #larger than averaage but not too large

#get actual train features to feed into neural nets for training
x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

In [17]:
test_sequences = tokenizer.texts_to_sequences(x_test)
#get actual test features to feed into neural nets for testing
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

Get word embeddings matrix for start input to neural net

In [18]:
#Citation: This technique to get word embeddings comes, with some minor changes, mostly from: 
#https://towardsdatascience.com/another-twitter-sentiment-analysis-with-python-part-11-cnn-word2vec-41f5e28eda74

embeddings_index = {}
for w in w2v.wv.vocab.keys():
    embeddings_index[w] = w2v.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

Below we define evlaution metric functions

In [19]:
def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [20]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better. Percent incorrectly chosen labels counting assignment and non-assignment equally
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])


#K is what we imported keras backend as

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [21]:
def get_all_metrics(actual_labels, predictions):
    print('Getting evaluation metrics for each label:')
    get_per_label_metrics(actual_labels, predictions)
    print('Getting evaluations for multilabel problem')
    print('Multilabel accuracy: ' + str(multi_label_accuracy(actual_labels, predictions)))
    print('Multilabel precision: ' + str(multi_label_precision(actual_labels, predictions)))
    print('Multilabel recall: ' + str(multi_label_recall(actual_labels, predictions)))
    print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(actual_labels, predictions))))

In [22]:
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
#not same as patience. Want to not even start looking until delay is reached
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__(monitor=monitor, min_delta=min_delta, patience=patience,verbose=verbose, mode=mode)
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [23]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

Convolutional Neural Networks

In [24]:
model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 4s - loss: 0.7048 - raw_multi_label_accuracy: 0.1188 - val_loss: 0.6516 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 2s - loss: 0.6562 - raw_multi_label_accuracy: 0.0433 - val_loss: 0.6360 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 2s - loss: 0.6410 - raw_multi_label_accuracy: 0.0352 - val_loss: 0.6254 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 2s - loss: 0.6278 - raw_multi_label_accuracy: 0.0310 - val_loss: 0.6154 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 3s - loss: 0.6178 - raw_multi_label_accuracy: 0.0136 - val_loss: 0.6090 - val_raw_multi_label_accura

<keras.callbacks.History at 0x7f706b957a58>

In [25]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [26]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6128364389233955
Precision for Action-Adventure: 0.5
Recall for Action-Adventure: 0.5240641711229946

Accuruacy for Romance: 0.7805383022774327
Precision for Romance: 0.5
Recall for Romance: 0.25471698113207547

Accuruacy for Horror-Thriller: 0.6459627329192547
Precision for Horror-Thriller: 0.6235294117647059
Recall for Horror-Thriller: 0.2760416666666667

Accuruacy for Comedy: 0.6045548654244306
Precision for Comedy: 0.5482456140350878
Recall for Comedy: 0.5868544600938967

Accuruacy for Science Fiction: 0.8819875776397516
Precision for Science Fiction: 0.45454545454545453
Recall for Science Fiction: 0.08928571428571429

Getting evaluations for multilabel problem
Multilabel accuracy: 0.3624913733609387
Multilabel precision: 0.5383720930232558
Multilabel recall: 0.42943409247757075
Percent of correctly decided label decisions: 70.5175983436853


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [27]:
model_input = Input(shape=(max_seq_len,), dtype='int32')
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])

In [28]:
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 9s - loss: 2.9272 - raw_multi_label_accuracy: 0.0839 - val_loss: 2.3268 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 8s - loss: 1.9574 - raw_multi_label_accuracy: 0.0623 - val_loss: 1.5678 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 7s - loss: 1.3484 - raw_multi_label_accuracy: 0.0221 - val_loss: 1.1155 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 7s - loss: 0.9936 - raw_multi_label_accuracy: 0.0407 - val_loss: 0.8653 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 9s - loss: 0.8005 - raw_multi_label_accuracy: 0.0243 - val_loss: 0.7271 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 8s - loss: 0.6962 - raw_multi_label_accuracy: 0.0110 - val_loss: 0.6558 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 8s - loss: 0.6424 - raw_multi_label_accuracy: 0.0202 - val_loss: 0.6209 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 8/1000
 - 7s - loss: 0.6152 - 

<keras.callbacks.History at 0x7f706bd165f8>

In [29]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [30]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6728778467908902
Precision for Action-Adventure: 0.5868263473053892
Recall for Action-Adventure: 0.5240641711229946

Accuruacy for Romance: 0.7681159420289855
Precision for Romance: 0.44642857142857145
Recall for Romance: 0.2358490566037736

Accuruacy for Horror-Thriller: 0.6480331262939959
Precision for Horror-Thriller: 0.5733333333333334
Recall for Horror-Thriller: 0.4479166666666667

Accuruacy for Comedy: 0.6459627329192547
Precision for Comedy: 0.6166666666666667
Recall for Comedy: 0.5211267605633803

Accuruacy for Science Fiction: 0.8819875776397516
Precision for Science Fiction: 0.45454545454545453
Recall for Science Fiction: 0.08928571428571429

Getting evaluations for multilabel problem
Multilabel accuracy: 0.38492063492063494
Multilabel precision: 0.5884433962264151
Multilabel recall: 0.44634230503795724
Percent of correctly decided label decisions: 72.33954451345755


Regular Neural Network

In [31]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 5s - loss: 0.6213 - raw_multi_label_accuracy: 0.0536 - val_loss: 0.5935 - val_raw_multi_label_accuracy: 0.0548
Epoch 2/1000
 - 4s - loss: 0.5833 - raw_multi_label_accuracy: 0.0786 - val_loss: 0.5930 - val_raw_multi_label_accuracy: 0.1187
Epoch 3/1000
 - 3s - loss: 0.5584 - raw_multi_label_accuracy: 0.1432 - val_loss: 0.5925 - val_raw_multi_label_accuracy: 0.0971
Epoch 4/1000
 - 3s - loss: 0.5314 - raw_multi_label_accuracy: 0.2156 - val_loss: 0.6051 - val_raw_multi_label_accuracy: 0.1486
Epoch 5/1000
 - 4s - loss: 0.4837 - raw_multi_label_accuracy: 0.3520 - val_loss: 0.6298 - val_raw_multi_label_accuracy: 0.1897
Epoch 6/1000
 - 3s - loss: 0.4025 - raw_multi_label_accuracy: 0.5316 - val_loss: 0.6012 - val_raw_multi_label_accuracy: 0.1688
Epoch 7/1000
 - 3s - loss: 0.2938 - raw_multi_label_accuracy: 0.7166 - val_loss: 0.6224 - val_raw_multi_label_accuracy: 0.1703
Epoch 8/1000
 - 3s - loss: 0.2023 - raw_multi_label_accuracy: 0.

<keras.callbacks.History at 0x7f705bec5278>

In [32]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [33]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.650103519668737
Precision for Action-Adventure: 0.5918367346938775
Recall for Action-Adventure: 0.31016042780748665

Accuruacy for Romance: 0.7784679089026915
Precision for Romance: 0.4782608695652174
Recall for Romance: 0.10377358490566038

Accuruacy for Horror-Thriller: 0.6045548654244306
Precision for Horror-Thriller: 0.5048543689320388
Recall for Horror-Thriller: 0.2708333333333333

Accuruacy for Comedy: 0.5507246376811594
Precision for Comedy: 0.4852941176470588
Recall for Comedy: 0.30985915492957744

Accuruacy for Science Fiction: 0.8861283643892339
Precision for Science Fiction: 0.6
Recall for Science Fiction: 0.05357142857142857

Getting evaluations for multilabel problem
Multilabel accuracy: 0.23602484472049687
Multilabel precision: 0.5155913978494624
Multilabel recall: 0.25879917184265006
Percent of correctly decided label decisions: 69.39958592132504


LSTM

In [34]:
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
lstm_model.add(Dense(256, activation='relu'))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 23s - loss: 0.6325 - raw_multi_label_accuracy: 0.0046 - val_loss: 0.5922 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 16s - loss: 0.5951 - raw_multi_label_accuracy: 0.0021 - val_loss: 0.5837 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 16s - loss: 0.5897 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5848 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 17s - loss: 0.5884 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5796 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 16s - loss: 0.5851 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5848 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 16s - loss: 0.5810 - raw_multi_label_accuracy: 0.0063 - val_loss: 0.5751 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 16s - loss: 0.5703 - raw_multi_label_accuracy: 0.0487 - val_loss: 0.5814 - val_raw_multi_label_accuracy: 0.0770
Epoch 8/1000
 - 16s -

<keras.callbacks.History at 0x7f70595ba710>

In [35]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [36]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.629399585921325
Precision for Action-Adventure: 0.5169491525423728
Recall for Action-Adventure: 0.6524064171122995

Accuruacy for Romance: 0.772256728778468
Precision for Romance: 0.4722222222222222
Recall for Romance: 0.32075471698113206

Accuruacy for Horror-Thriller: 0.6687370600414079
Precision for Horror-Thriller: 0.5776699029126213
Recall for Horror-Thriller: 0.6197916666666666

Accuruacy for Comedy: 0.6935817805383023
Precision for Comedy: 0.620817843866171
Recall for Comedy: 0.784037558685446

Accuruacy for Science Fiction: 0.8881987577639752
Precision for Science Fiction: 0.5714285714285714
Recall for Science Fiction: 0.14285714285714285

Getting evaluations for multilabel problem
Multilabel accuracy: 0.4860248447204972
Multilabel precision: 0.5938578329882681
Multilabel recall: 0.6306073153899238
Percent of correctly decided label decisions: 73.04347826086956


simple rnn

In [37]:
rnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
rnn.add(e)
rnn.add(SimpleRNN(32, activation = 'relu'))
rnn.add(Dense(256, activation='relu'))
rnn.add(Dense(len(genre_dict), activation='sigmoid'))
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
rnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 6s - loss: 0.6476 - raw_multi_label_accuracy: 0.0459 - val_loss: 0.6017 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 3s - loss: 0.5958 - raw_multi_label_accuracy: 0.0055 - val_loss: 0.5884 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 3s - loss: 0.5814 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5835 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 3s - loss: 0.5611 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5791 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 4s - loss: 0.5326 - raw_multi_label_accuracy: 0.0069 - val_loss: 0.5779 - val_raw_multi_label_accuracy: 0.0069
Epoch 6/1000
 - 3s - loss: 0.4832 - raw_multi_label_accuracy: 0.2154 - val_loss: 0.5644 - val_raw_multi_label_accuracy: 0.0741
Epoch 7/1000
 - 3s - loss: 0.3952 - raw_multi_label_accuracy: 0.5003 - val_loss: 0.6032 - val_raw_multi_label_accuracy: 0.1714
Epoch 8/1000
 - 3s - loss: 0.2917 - raw_

<keras.callbacks.History at 0x7f7058396ac8>

In [38]:
predictions = nn_output_to_predictions(rnn.predict(x_test_seq))

In [39]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.5962732919254659
Precision for Action-Adventure: 0.4759036144578313
Recall for Action-Adventure: 0.42245989304812837

Accuruacy for Romance: 0.7743271221532091
Precision for Romance: 0.4482758620689655
Recall for Romance: 0.12264150943396226

Accuruacy for Horror-Thriller: 0.5507246376811594
Precision for Horror-Thriller: 0.41007194244604317
Recall for Horror-Thriller: 0.296875

Accuruacy for Comedy: 0.494824016563147
Precision for Comedy: 0.41968911917098445
Recall for Comedy: 0.38028169014084506

Accuruacy for Science Fiction: 0.8861283643892339
Precision for Science Fiction: 0.5714285714285714
Recall for Science Fiction: 0.07142857142857142

Getting evaluations for multilabel problem
Multilabel accuracy: 0.2550724637681158
Multilabel precision: 0.4351145038167939
Multilabel recall: 0.31159420289855067
Percent of correctly decided label decisions: 66.0455486542443
