In [1]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Word2Vec

from sklearn.model_selection import train_test_split
from sklearn import utils
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.regularizers import l2
from keras.layers import Input, concatenate, Activation
from keras.models import Model
from keras.layers import LSTM
from keras.layers import SimpleRNN

stop_words = set(stopwords.words('english'))

Using TensorFlow backend.


read in the data

In [2]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [3]:
#parse each row to get label vectors from json
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

Get dictionary for genre to its index in label vector

In [4]:
genre_dict = {}
genre_dict['Action-Adventure'] = 0
genre_dict['Romance'] = 1
genre_dict['Horror-Thriller'] = 2
genre_dict['Comedy'] = 3
genre_dict['Science Fiction'] = 4
#genre_dict['Drama'] = 5
genre_dict

{'Action-Adventure': 0,
 'Romance': 1,
 'Horror-Thriller': 2,
 'Comedy': 3,
 'Science Fiction': 4}

In [5]:
#map original labels to more coarse grained labels
genre_map = {}
genre_map['Adventure'] = 'Action-Adventure'
genre_map['Romance'] = 'Romance'
genre_map['Horror'] = 'Horror-Thriller'
genre_map['Thriller'] = 'Horror-Thriller'
genre_map['Comedy'] = 'Comedy'
#genre_map['War'] = 'Action-Adventure'#not sure about this
genre_map['Action'] = 'Action-Adventure'
genre_map['Science Fiction'] = 'Science Fiction'
#genre_map['Drama'] = 'Drama'

In [6]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret

In [7]:
getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [8]:
#put to lower case, remove punctation, remove stopwords
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text

all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [9]:
all_data = all_data[all_data.genres_vect.map(sum) > 0] #drop rows that now have no labels 

In [10]:
#neural net data only needs a few cols
nn_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [11]:
train, test = train_test_split(nn_data, test_size=0.2, random_state=42)

Extract actual features and labels from train and test set

In [12]:
#gettrian and test features for classification. Just need text and lables for this
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [13]:
#convert labels from array of lists to numpy array

y_train = y.tolist()
y_train = np.array(y_train)

y_test = y_test.tolist()
y_test = np.array(y_test)

Get initial word embedding vectors

In [14]:
tok = [word_tokenize(ov) for ov in x]

In [15]:
word_vec_len = 32
w2v = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [16]:
num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150 #larger than averaage but not too large

#get actual train features to feed into neural nets for training
x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

In [17]:
test_sequences = tokenizer.texts_to_sequences(x_test)
#get actual test features to feed into neural nets for testing
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

Get word embeddings matrix for start input to neural net

In [18]:
#Citation: This technique to get word embeddings comes, with some minor changes, mostly from: 
#https://towardsdatascience.com/another-twitter-sentiment-analysis-with-python-part-11-cnn-word2vec-41f5e28eda74

embeddings_index = {}
for w in w2v.wv.vocab.keys():
    embeddings_index[w] = w2v.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

Below we define evlaution metric functions

In [19]:
def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [20]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better. Percent incorrectly chosen labels counting assignment and non-assignment equally
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])


#K is what we imported keras backend as

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [21]:
def get_all_metrics(actual_labels, predictions):
    print('Getting evaluation metrics for each label:')
    get_per_label_metrics(actual_labels, predictions)
    print('Getting evaluations for multilabel problem')
    print('Multilabel accuracy: ' + str(multi_label_accuracy(actual_labels, predictions)))
    print('Multilabel precision: ' + str(multi_label_precision(actual_labels, predictions)))
    print('Multilabel recall: ' + str(multi_label_recall(actual_labels, predictions)))
    print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(actual_labels, predictions))))

In [22]:
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
#not same as patience. Want to not even start looking until delay is reached
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__()
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [23]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

Convolutional Neural Networks

In [24]:
model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 6s - loss: 0.7084 - raw_multi_label_accuracy: 0.0428 - val_loss: 0.6524 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 4s - loss: 0.6584 - raw_multi_label_accuracy: 0.0447 - val_loss: 0.6369 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 4s - loss: 0.6402 - raw_multi_label_accuracy: 0.0217 - val_loss: 0.6260 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 4s - loss: 0.6261 - raw_multi_label_accuracy: 0.0178 - val_loss: 0.6155 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 5s - loss: 0.6130 - raw_multi_label_accuracy: 0.0220 - val_loss: 0.6097 - val_raw_multi_label_accura

<keras.callbacks.History at 0x7fbb9ddbe518>

In [25]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [26]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6770186335403726
Precision for Action-Adventure: 0.6476190476190476
Recall for Action-Adventure: 0.36363636363636365

Accuruacy for Romance: 0.7743271221532091
Precision for Romance: 0.4666666666666667
Recall for Romance: 0.19811320754716982

Accuruacy for Horror-Thriller: 0.7101449275362319
Precision for Horror-Thriller: 0.6940298507462687
Recall for Horror-Thriller: 0.484375

Accuruacy for Comedy: 0.629399585921325
Precision for Comedy: 0.56640625
Recall for Comedy: 0.6807511737089202

Accuruacy for Science Fiction: 0.8840579710144928
Precision for Science Fiction: 0.5
Recall for Science Fiction: 0.03571428571428571

Getting evaluations for multilabel problem
Multilabel accuracy: 0.4133885438233265
Multilabel precision: 0.6109794628751976
Multilabel recall: 0.46773636991028306
Percent of correctly decided label decisions: 73.49896480331263


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [27]:
model_input = Input(shape=(max_seq_len,), dtype='int32')
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.01))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])

In [28]:
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 12s - loss: 2.8846 - raw_multi_label_accuracy: 0.0741 - val_loss: 2.2812 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 10s - loss: 1.9111 - raw_multi_label_accuracy: 0.0425 - val_loss: 1.5225 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 10s - loss: 1.3064 - raw_multi_label_accuracy: 0.0288 - val_loss: 1.0804 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 10s - loss: 0.9664 - raw_multi_label_accuracy: 0.0246 - val_loss: 0.8375 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 9s - loss: 0.7804 - raw_multi_label_accuracy: 0.0463 - val_loss: 0.7151 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 10s - loss: 0.6825 - raw_multi_label_accuracy: 0.0172 - val_loss: 0.6479 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 9s - loss: 0.6344 - raw_multi_label_accuracy: 0.0271 - val_loss: 0.6154 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 8/1000
 - 9s - loss: 0.60

<keras.callbacks.History at 0x7fbb9e04c240>

In [29]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [30]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6687370600414079
Precision for Action-Adventure: 0.5737704918032787
Recall for Action-Adventure: 0.5614973262032086

Accuruacy for Romance: 0.7805383022774327
Precision for Romance: 0.5
Recall for Romance: 0.1320754716981132

Accuruacy for Horror-Thriller: 0.6335403726708074
Precision for Horror-Thriller: 0.5423728813559322
Recall for Horror-Thriller: 0.5

Accuruacy for Comedy: 0.6418219461697723
Precision for Comedy: 0.6176470588235294
Recall for Comedy: 0.49295774647887325

Accuruacy for Science Fiction: 0.8861283643892339
Precision for Science Fiction: 0.6666666666666666
Recall for Science Fiction: 0.03571428571428571

Getting evaluations for multilabel problem
Multilabel accuracy: 0.3837129054520359
Multilabel precision: 0.58
Multilabel recall: 0.44841269841269843
Percent of correctly decided label decisions: 72.21532091097309


Regular Neural Network

In [31]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 2s - loss: 0.6226 - raw_multi_label_accuracy: 0.0399 - val_loss: 0.5947 - val_raw_multi_label_accuracy: 0.0703
Epoch 2/1000
 - 2s - loss: 0.5834 - raw_multi_label_accuracy: 0.0866 - val_loss: 0.5999 - val_raw_multi_label_accuracy: 0.1326
Epoch 3/1000
 - 2s - loss: 0.5617 - raw_multi_label_accuracy: 0.1649 - val_loss: 0.5991 - val_raw_multi_label_accuracy: 0.1114
Epoch 4/1000
 - 3s - loss: 0.5231 - raw_multi_label_accuracy: 0.2281 - val_loss: 0.6068 - val_raw_multi_label_accuracy: 0.1830
Epoch 5/1000
 - 2s - loss: 0.4616 - raw_multi_label_accuracy: 0.4115 - val_loss: 0.6114 - val_raw_multi_label_accuracy: 0.1986
Epoch 6/1000
 - 3s - loss: 0.3526 - raw_multi_label_accuracy: 0.6244 - val_loss: 0.5994 - val_raw_multi_label_accuracy: 0.1887
Epoch 7/1000
 - 2s - loss: 0.2343 - raw_multi_label_accuracy: 0.7941 - val_loss: 0.5930 - val_raw_multi_label_accuracy: 0.1926
Epoch 8/1000
 - 2s - loss: 0.1506 - raw_multi_label_accuracy: 0.

<keras.callbacks.History at 0x7fbb8e295eb8>

In [32]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [33]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.6480331262939959
Precision for Action-Adventure: 0.5639097744360902
Recall for Action-Adventure: 0.40106951871657753

Accuruacy for Romance: 0.7846790890269151
Precision for Romance: 0.5416666666666666
Recall for Romance: 0.12264150943396226

Accuruacy for Horror-Thriller: 0.6211180124223602
Precision for Horror-Thriller: 0.5463917525773195
Recall for Horror-Thriller: 0.2760416666666667

Accuruacy for Comedy: 0.5527950310559007
Precision for Comedy: 0.48905109489051096
Recall for Comedy: 0.3145539906103286

Accuruacy for Science Fiction: 0.8902691511387164
Precision for Science Fiction: 0.6666666666666666
Recall for Science Fiction: 0.10714285714285714

Getting evaluations for multilabel problem
Multilabel accuracy: 0.26249137336093853
Multilabel precision: 0.5331325301204819
Multilabel recall: 0.2907177363699103
Percent of correctly decided label decisions: 69.93788819875778


LSTM

In [34]:
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
lstm_model.add(Dense(256, activation='relu'))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 9s - loss: 0.6387 - raw_multi_label_accuracy: 0.0620 - val_loss: 0.5914 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 7s - loss: 0.5954 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5828 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 7s - loss: 0.5889 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5845 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 8s - loss: 0.5862 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5810 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 10s - loss: 0.5850 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5794 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 6/1000
 - 9s - loss: 0.5810 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5763 - val_raw_multi_label_accuracy: 0.0035
Epoch 7/1000
 - 9s - loss: 0.5683 - raw_multi_label_accuracy: 0.0403 - val_loss: 0.5744 - val_raw_multi_label_accuracy: 0.0068
Epoch 8/1000
 - 7s - lo

<keras.callbacks.History at 0x7fbb8d9aa470>

In [35]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [36]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.7080745341614907
Precision for Action-Adventure: 0.5958333333333333
Recall for Action-Adventure: 0.7647058823529411

Accuruacy for Romance: 0.7577639751552795
Precision for Romance: 0.43820224719101125
Recall for Romance: 0.36792452830188677

Accuruacy for Horror-Thriller: 0.639751552795031
Precision for Horror-Thriller: 0.5401785714285714
Recall for Horror-Thriller: 0.6302083333333334

Accuruacy for Comedy: 0.6873706004140787
Precision for Comedy: 0.6220472440944882
Recall for Comedy: 0.7417840375586855

Accuruacy for Science Fiction: 0.8840579710144928
Precision for Science Fiction: 0.5
Recall for Science Fiction: 0.17857142857142858

Getting evaluations for multilabel problem
Multilabel accuracy: 0.4931677018633544
Multilabel precision: 0.5955831608005521
Multilabel recall: 0.6549344375431332
Percent of correctly decided label decisions: 73.54037267080746


simple rnn

In [37]:
rnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
rnn.add(e)
rnn.add(SimpleRNN(32, activation = 'relu'))
rnn.add(Dense(256, activation='relu'))
rnn.add(Dense(len(genre_dict), activation='sigmoid'))
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
rnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=25)], epochs=1000, batch_size=100, verbose=2)

Train on 1738 samples, validate on 194 samples
Epoch 1/1000
 - 2s - loss: 0.6458 - raw_multi_label_accuracy: 0.0302 - val_loss: 0.5883 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 2/1000
 - 1s - loss: 0.5862 - raw_multi_label_accuracy: 0.0026 - val_loss: 0.5838 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 3/1000
 - 1s - loss: 0.5708 - raw_multi_label_accuracy: 0.0000e+00 - val_loss: 0.5785 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 1s - loss: 0.5514 - raw_multi_label_accuracy: 0.0101 - val_loss: 0.5762 - val_raw_multi_label_accuracy: 0.0035
Epoch 5/1000
 - 1s - loss: 0.5171 - raw_multi_label_accuracy: 0.0529 - val_loss: 0.5728 - val_raw_multi_label_accuracy: 0.0459
Epoch 6/1000
 - 1s - loss: 0.4618 - raw_multi_label_accuracy: 0.3071 - val_loss: 0.5773 - val_raw_multi_label_accuracy: 0.1096
Epoch 7/1000
 - 1s - loss: 0.3625 - raw_multi_label_accuracy: 0.5142 - val_loss: 0.6293 - val_raw_multi_label_accuracy: 0.1652
Epoch 8/1000
 - 1s - loss: 0.2667 - raw_multi_la

<keras.callbacks.History at 0x7fbb8c87bf28>

In [38]:
predictions = nn_output_to_predictions(rnn.predict(x_test_seq))

In [39]:
get_all_metrics(y_test, predictions)

Getting evaluation metrics for each label:
Accuruacy for Action-Adventure: 0.525879917184265
Precision for Action-Adventure: 0.4153225806451613
Recall for Action-Adventure: 0.5508021390374331

Accuruacy for Romance: 0.7204968944099379
Precision for Romance: 0.23636363636363636
Recall for Romance: 0.12264150943396226

Accuruacy for Horror-Thriller: 0.4906832298136646
Precision for Horror-Thriller: 0.3622448979591837
Recall for Horror-Thriller: 0.3697916666666667

Accuruacy for Comedy: 0.5424430641821946
Precision for Comedy: 0.48148148148148145
Recall for Comedy: 0.48826291079812206

Accuruacy for Science Fiction: 0.8944099378881988
Precision for Science Fiction: 1.0
Recall for Science Fiction: 0.08928571428571429

Getting evaluations for multilabel problem
Multilabel accuracy: 0.2853692201518289
Multilabel precision: 0.39796659404502555
Multilabel recall: 0.3907867494824017
Percent of correctly decided label decisions: 63.47826086956522
