In [1]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Doc2Vec
from gensim.models import Word2Vec
from gensim.models.doc2vec import TaggedDocument

from sklearn.model_selection import train_test_split
from sklearn import utils

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = set(stopwords.words('english'))

In [2]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [3]:
def text_to_list(x):
    if pd.isna(x):
        return ''
    else:
        return ast.literal_eval(x)

def parse_json(x):
    try:
        return json.loads(x.replace("'", '"'))[0]['name']
    except:
        return ''
    
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

In [4]:
genre_dict = {'War': 0,
 'Family': 1,
 'Science Fiction': 2,
 'Thriller': 3,
 'Horror': 4,
 'Romance': 5,
 'Drama': 6,
 'Foreign': 7,
 'Documentary': 8,
 'Fantasy': 9,
 'Western': 10,
 'History': 11,
 'Comedy': 12,
 'Action': 13,
 'Adventure': 14,
 'Animation': 15,
 'Crime': 16,
 'Music': 17,
 'TV Movie': 18,
 'Mystery': 19}
genre_dict

{'War': 0,
 'Family': 1,
 'Science Fiction': 2,
 'Thriller': 3,
 'Horror': 4,
 'Romance': 5,
 'Drama': 6,
 'Foreign': 7,
 'Documentary': 8,
 'Fantasy': 9,
 'Western': 10,
 'History': 11,
 'Comedy': 12,
 'Action': 13,
 'Adventure': 14,
 'Animation': 15,
 'Crime': 16,
 'Music': 17,
 'TV Movie': 18,
 'Mystery': 19}

In [5]:
#for mapping to coarse grained labels (in this situation we don't do that so labels map to self)
genre_map = {'War': 'War',
 'Family': 'Family',
 'Science Fiction': 'Science Fiction',
 'Thriller': 'Thriller',
 'Horror': 'Horror',
 'Romance': 'Romance',
 'Drama': 'Drama',
 'Foreign': 'Foreign',
 'Documentary': 'Documentary',
 'Fantasy': 'Fantasy',
 'Western': 'Western',
 'History': 'History',
 'Comedy': 'Comedy',
 'Action': 'Action',
 'Adventure': 'Adventure',
 'Animation': 'Animation',
 'Crime': 'Crime',
 'Music': 'Music',
 'TV Movie': 'TV Movie',
 'Mystery': 'Mystery'}

In [6]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret
    return ret

In [7]:
labels_vects = getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [8]:
#put to lower case, remove punctation
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text
all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [9]:
all_data = all_data[all_data.genres_vect.map(sum) > 0]

In [10]:
#logistic regression data
lr_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [11]:
train, test = train_test_split(lr_data, test_size=0.2, random_state=42)

CNN STUFF here

In [12]:
#get word embeddings
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']

In [13]:
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [14]:
y_train = y.tolist()
y_train = np.array(y_train)

In [15]:
y_test = y_test.tolist()
y_test = np.array(y_test)

In [16]:
tok = [word_tokenize(sent) for sent in x]

In [17]:
word_vec_len = 32
model = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [18]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150

x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

Using TensorFlow backend.


In [19]:
test_sequences = tokenizer.texts_to_sequences(x_test)
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

In [20]:
embeddings_index = {}
for w in model.wv.vocab.keys():
    embeddings_index[w] = model.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [21]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [22]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])

import keras.backend as K

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [23]:
from keras.callbacks import EarlyStopping
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__()
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [24]:
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.regularizers import l2

model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(50, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
#model_cnn_01.fit(x_train_seq, y_train, validation_data=(x_val_seq, y_validation), epochs=5, batch_size=32, verbose=2)
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 3s - loss: 1.9547 - raw_multi_label_accuracy: 0.1290 - val_loss: 1.4882 - val_raw_multi_label_accuracy: 0.0580
Epoch 2/1000
 - 2s - loss: 1.3078 - raw_multi_label_accuracy: 0.1388 - val_loss: 0.9984 - val_raw_multi_label_accuracy: 0.1164
Epoch 3/1000
 - 1s - loss: 0.9113 - raw_multi_label_accuracy: 0.1282 - val_loss: 0.7192 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 1s - loss: 0.6759 - raw_multi_label_accuracy: 0.1285 - val_loss: 0.5543 - val_raw_multi_label_accuracy: 0.1695
Epoch 5/1000
 - 1s - loss: 0.5361 - raw_multi_label_accuracy: 0.1221 - val_loss: 0.4538 - val_raw_multi_label_accuracy: 0.1700
E

Epoch 56/1000
 - 2s - loss: 0.2738 - raw_multi_label_accuracy: 0.1803 - val_loss: 0.3402 - val_raw_multi_label_accuracy: 0.1338
Epoch 57/1000
 - 1s - loss: 0.2730 - raw_multi_label_accuracy: 0.1826 - val_loss: 0.3482 - val_raw_multi_label_accuracy: 0.1364
Epoch 58/1000
 - 1s - loss: 0.2729 - raw_multi_label_accuracy: 0.1860 - val_loss: 0.3529 - val_raw_multi_label_accuracy: 0.1495
Epoch 59/1000
 - 1s - loss: 0.2710 - raw_multi_label_accuracy: 0.1863 - val_loss: 0.3499 - val_raw_multi_label_accuracy: 0.1353
Epoch 60/1000
 - 1s - loss: 0.2706 - raw_multi_label_accuracy: 0.1831 - val_loss: 0.3566 - val_raw_multi_label_accuracy: 0.1470
Epoch 61/1000
 - 2s - loss: 0.2710 - raw_multi_label_accuracy: 0.1888 - val_loss: 0.3438 - val_raw_multi_label_accuracy: 0.1240
Epoch 62/1000
 - 2s - loss: 0.2696 - raw_multi_label_accuracy: 0.1914 - val_loss: 0.3535 - val_raw_multi_label_accuracy: 0.1391
Epoch 63/1000
 - 2s - loss: 0.2691 - raw_multi_label_accuracy: 0.1871 - val_loss: 0.3572 - val_raw_multi

Epoch 120/1000
 - 2s - loss: 0.2427 - raw_multi_label_accuracy: 0.2848 - val_loss: 0.4160 - val_raw_multi_label_accuracy: 0.1852
Epoch 121/1000
 - 2s - loss: 0.2423 - raw_multi_label_accuracy: 0.2888 - val_loss: 0.4088 - val_raw_multi_label_accuracy: 0.1664
Epoch 122/1000
 - 2s - loss: 0.2431 - raw_multi_label_accuracy: 0.2855 - val_loss: 0.4176 - val_raw_multi_label_accuracy: 0.1767
Epoch 123/1000
 - 2s - loss: 0.2421 - raw_multi_label_accuracy: 0.2886 - val_loss: 0.4221 - val_raw_multi_label_accuracy: 0.1803
Epoch 124/1000
 - 2s - loss: 0.2415 - raw_multi_label_accuracy: 0.2927 - val_loss: 0.4448 - val_raw_multi_label_accuracy: 0.1819
Epoch 125/1000
 - 2s - loss: 0.2413 - raw_multi_label_accuracy: 0.2883 - val_loss: 0.4253 - val_raw_multi_label_accuracy: 0.1771
Epoch 126/1000
 - 2s - loss: 0.2398 - raw_multi_label_accuracy: 0.2949 - val_loss: 0.4126 - val_raw_multi_label_accuracy: 0.1790
Epoch 127/1000
 - 2s - loss: 0.2408 - raw_multi_label_accuracy: 0.2921 - val_loss: 0.4464 - val_r

Epoch 184/1000
 - 2s - loss: 0.2179 - raw_multi_label_accuracy: 0.3538 - val_loss: 0.4971 - val_raw_multi_label_accuracy: 0.1916
Epoch 185/1000
 - 1s - loss: 0.2154 - raw_multi_label_accuracy: 0.3572 - val_loss: 0.4958 - val_raw_multi_label_accuracy: 0.1928
Epoch 186/1000
 - 1s - loss: 0.2165 - raw_multi_label_accuracy: 0.3568 - val_loss: 0.4885 - val_raw_multi_label_accuracy: 0.1902
Epoch 187/1000
 - 2s - loss: 0.2163 - raw_multi_label_accuracy: 0.3518 - val_loss: 0.5252 - val_raw_multi_label_accuracy: 0.1899
Epoch 188/1000
 - 1s - loss: 0.2155 - raw_multi_label_accuracy: 0.3519 - val_loss: 0.4996 - val_raw_multi_label_accuracy: 0.1924
Epoch 189/1000
 - 2s - loss: 0.2170 - raw_multi_label_accuracy: 0.3543 - val_loss: 0.5150 - val_raw_multi_label_accuracy: 0.1912
Epoch 190/1000
 - 1s - loss: 0.2149 - raw_multi_label_accuracy: 0.3574 - val_loss: 0.5166 - val_raw_multi_label_accuracy: 0.1983
Epoch 191/1000
 - 1s - loss: 0.2147 - raw_multi_label_accuracy: 0.3593 - val_loss: 0.4923 - val_r

<keras.callbacks.History at 0x7fa491f27ef0>

In [25]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

In [26]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [27]:
y_test[:,0].sum()

21

In [28]:
predictions[:,0].sum()

0

In [29]:
predictions[0]

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [30]:
multi_label_accuracy(y_test, predictions)

0.24158823216612166

In [31]:
multi_label_precision(y_test, predictions)

0.5045395590142673

In [32]:
multi_label_recall(y_test, predictions)

0.29439259791018585

In [33]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 86.70854271356784


In [34]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9179229480737019
Precision for Family: 0.2
Recall for Family: 0.021739130434782608

Accuruacy for Science Fiction: 0.8961474036850922
Precision for Science Fiction: 0.0
Recall for Science Fiction: 0.0

Accuruacy for Thriller: 0.6549413735343383
Precision for Thriller: 0.39779005524861877
Recall for Thriller: 0.4260355029585799

Accuruacy for Horror: 0.9061976549413735
Precision for Horror: 0.0
Recall for Horror: 0.0

Accuruacy for Romance: 0.7956448911222781
Precision for Romance: 0.28
Recall for Romance: 0.06306306306306306

Accuruacy for Drama: 0.5695142378559463
Precision for Drama: 0.5679824561403509
Recall for Drama: 0.8119122257053292

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
Recall for Documentary: 0.0

Accuruacy for Fantasy: 0.9229480737018425
Pre

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [35]:
from keras.layers import Input, Dense, concatenate, Activation
from keras.models import Model

model_input = Input(shape=(max_seq_len,), dtype='int32')

e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.001))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])

In [36]:
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 5s - loss: 2.6250 - raw_multi_label_accuracy: 0.1266 - val_loss: 1.8843 - val_raw_multi_label_accuracy: 0.1415
Epoch 2/1000
 - 4s - loss: 1.4989 - raw_multi_label_accuracy: 0.1242 - val_loss: 1.0949 - val_raw_multi_label_accuracy: 0.1695
Epoch 3/1000
 - 4s - loss: 0.8921 - raw_multi_label_accuracy: 0.1034 - val_loss: 0.6831 - val_raw_multi_label_accuracy: 0.0795
Epoch 4/1000
 - 4s - loss: 0.5873 - raw_multi_label_accuracy: 0.1118 - val_loss: 0.4856 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 4s - loss: 0.4428 - raw_multi_label_accuracy: 0.0923 - val_loss: 0.3931 - val_raw_multi_label_accuracy: 0.1695
Epoch 6/1000
 - 4s - loss: 0.3763 - raw_multi_label_accuracy: 0.1049 - val_loss: 0.3527 - val_raw_multi_label_accuracy: 0.1695
Epoch 7/1000
 - 4s - loss: 0.3464 - raw_multi_label_accuracy: 0.0952 - val_loss: 0.3365 - val_raw_multi_label_accuracy: 0.1695
Epoch 8/1000
 - 4s - loss: 0.3334 - raw_multi_label_accuracy

<keras.callbacks.History at 0x7fa471155e48>

In [37]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [38]:
multi_label_accuracy(y_test, predictions)

0.18065725452660114

In [39]:
multi_label_precision(y_test, predictions)

0.603305785123967

In [40]:
multi_label_recall(y_test, predictions)

0.18640823163436215

In [41]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 87.79731993299833


In [42]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9229480737018425
Precision for Family: 0.0
Recall for Family: 0.0

Accuruacy for Science Fiction: 0.897822445561139
Precision for Science Fiction: 1.0
Recall for Science Fiction: 0.016129032258064516

Accuruacy for Thriller: 0.7252931323283082
Precision for Thriller: 0.6190476190476191
Recall for Thriller: 0.07692307692307693

Accuruacy for Horror: 0.9061976549413735
Precision for Horror: 0.0
Recall for Horror: 0.0

Accuruacy for Romance: 0.8090452261306532
Precision for Romance: 0.2857142857142857
Recall for Romance: 0.018018018018018018

Accuruacy for Drama: 0.5979899497487438
Precision for Drama: 0.6207951070336392
Recall for Drama: 0.6363636363636364

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
Recall for Documentary: 0.0

Accuruacy for Fantasy: 0.922948

Regular Neural Network

In [43]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 2s - loss: 0.4002 - raw_multi_label_accuracy: 0.0808 - val_loss: 0.3392 - val_raw_multi_label_accuracy: 0.1100
Epoch 2/1000
 - 2s - loss: 0.3229 - raw_multi_label_accuracy: 0.1074 - val_loss: 0.3266 - val_raw_multi_label_accuracy: 0.0874
Epoch 3/1000
 - 2s - loss: 0.3060 - raw_multi_label_accuracy: 0.1316 - val_loss: 0.3265 - val_raw_multi_label_accuracy: 0.1098
Epoch 4/1000
 - 2s - loss: 0.2958 - raw_multi_label_accuracy: 0.1274 - val_loss: 0.3212 - val_raw_multi_label_accuracy: 0.0652
Epoch 5/1000
 - 2s - loss: 0.2860 - raw_multi_label_accuracy: 0.1491 - val_loss: 0.3214 - val_raw_multi_label_accuracy: 0.0786
Epoch 6/1000
 - 2s - loss: 0.2719 - raw_multi_label_accuracy: 0.1795 - val_loss: 0.3199 - val_raw_multi_label_accuracy: 0.1098
Epoch 7/1000
 - 2s - loss: 0.2543 - raw_multi_label_accuracy: 0.2449 - val_loss: 0.3176 - val_raw_multi_label_accuracy: 0.1046
Epoch 8/1000
 - 2s - loss: 0.2304 - raw_multi_label_accuracy: 0.

<keras.callbacks.History at 0x7fa470dd7be0>

In [44]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [45]:
multi_label_accuracy(y_test, predictions)

0.2245313870942012

In [46]:
multi_label_precision(y_test, predictions)

0.5557419835943325

In [47]:
multi_label_recall(y_test, predictions)

0.2560780090930844

In [48]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 87.72194304857621


In [49]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9212730318257957
Precision for Family: 0.0
Recall for Family: 0.0

Accuruacy for Science Fiction: 0.8927973199329984
Precision for Science Fiction: 0.375
Recall for Science Fiction: 0.04838709677419355

Accuruacy for Thriller: 0.711892797319933
Precision for Thriller: 0.4594594594594595
Recall for Thriller: 0.10059171597633136

Accuruacy for Horror: 0.9061976549413735
Precision for Horror: 0.5
Recall for Horror: 0.017857142857142856

Accuruacy for Romance: 0.8224455611390284
Precision for Romance: 0.5862068965517241
Recall for Romance: 0.15315315315315314

Accuruacy for Drama: 0.5963149078726968
Precision for Drama: 0.6167664670658682
Recall for Drama: 0.64576802507837

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
Recall for Documentary: 0.0

Accuruacy for Fa

LSTM

In [50]:
from keras.layers import LSTM
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 10s - loss: 0.5200 - raw_multi_label_accuracy: 0.0882 - val_loss: 0.3444 - val_raw_multi_label_accuracy: 0.1695
Epoch 2/1000
 - 8s - loss: 0.3209 - raw_multi_label_accuracy: 0.0806 - val_loss: 0.3171 - val_raw_multi_label_accuracy: 0.1695
Epoch 3/1000
 - 8s - loss: 0.3132 - raw_multi_label_accuracy: 0.1301 - val_loss: 0.3151 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 4/1000
 - 8s - loss: 0.3124 - raw_multi_label_accuracy: 0.0839 - val_loss: 0.3148 - val_raw_multi_label_accuracy: 0.0243
Epoch 5/1000
 - 8s - loss: 0.3119 - raw_multi_label_accuracy: 0.1274 - val_loss: 0.3148 - val_raw_multi_label_accuracy: 0.0225
Epoch 6/1000
 - 8s - loss: 0.3115 - raw_multi_label_accuracy: 0.1166 - val_loss: 0.3150 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 7/1000
 - 8s - loss: 0.3113 - raw_multi_label_accuracy: 0.1097 - val_loss: 0.3149 - val_raw_multi_label_accuracy: 0.0338
Epoch 8/1000
 - 8s - loss: 0.3111 - raw_multi_label_acc

<keras.callbacks.History at 0x7fa4705b3cf8>

In [51]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [52]:
multi_label_accuracy(y_test, predictions)

0.24510648480497746

In [53]:
multi_label_precision(y_test, predictions)

0.5620567375886524

In [54]:
multi_label_recall(y_test, predictions)

0.2793890085347372

In [55]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 87.66331658291458


In [56]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9229480737018425
Precision for Family: 0.0
Recall for Family: 0.0

Accuruacy for Science Fiction: 0.8961474036850922
Precision for Science Fiction: 0.0
Recall for Science Fiction: 0.0

Accuruacy for Thriller: 0.7001675041876047
Precision for Thriller: 0.4609375
Recall for Thriller: 0.34911242603550297

Accuruacy for Horror: 0.9061976549413735
Precision for Horror: 0.0
Recall for Horror: 0.0

Accuruacy for Romance: 0.8257956448911222
Precision for Romance: 0.5945945945945946
Recall for Romance: 0.1981981981981982

Accuruacy for Drama: 0.5845896147403685
Precision for Drama: 0.60790273556231
Recall for Drama: 0.6269592476489029

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
Recall for Documentary: 0.0

Accuruacy for Fantasy: 0.9229480737018425
Precision for Fant