In [4]:
import numpy as np
import pandas as pd
#for reading in data properly
import ast
import json

import gensim
from gensim.models import Doc2Vec
from gensim.models import Word2Vec
from gensim.models.doc2vec import TaggedDocument

from sklearn.model_selection import train_test_split
from sklearn import utils

import re

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = set(stopwords.words('english'))

In [5]:
all_data = pd.read_csv('train.csv')
all_data = all_data.dropna(subset=['overview', 'genres']) #drop cols without overview or genre (data we use or labels)

In [6]:
def text_to_list(x):
    if pd.isna(x):
        return ''
    else:
        return ast.literal_eval(x)

def parse_json(x):
    try:
        return json.loads(x.replace("'", '"'))[0]['name']
    except:
        return ''
    
def parse_genres_json(x):
    try:
        json_genres = json.loads(x.replace("'", '"'))
        numElems = len(json_genres)
        ret = [0]*len(genre_dict) #number of genres we are looking at
        for i in range(numElems):
            genre_str = (json_genres[i]['name'])
            if genre_str in genre_map.keys():
                ret[genre_dict[genre_map[genre_str]]] = 1
        return ret
    except Exception as excep:
        print('Exception' + str(excep))
        return ''

In [7]:
genre_dict = {'War': 0,
 'Family': 1,
 'Science Fiction': 2,
 'Thriller': 3,
 'Horror': 4,
 'Romance': 5,
 'Drama': 6,
 'Foreign': 7,
 'Documentary': 8,
 'Fantasy': 9,
 'Western': 10,
 'History': 11,
 'Comedy': 12,
 'Action': 13,
 'Adventure': 14,
 'Animation': 15,
 'Crime': 16,
 'Music': 17,
 'TV Movie': 18,
 'Mystery': 19}
genre_dict

{'War': 0,
 'Family': 1,
 'Science Fiction': 2,
 'Thriller': 3,
 'Horror': 4,
 'Romance': 5,
 'Drama': 6,
 'Foreign': 7,
 'Documentary': 8,
 'Fantasy': 9,
 'Western': 10,
 'History': 11,
 'Comedy': 12,
 'Action': 13,
 'Adventure': 14,
 'Animation': 15,
 'Crime': 16,
 'Music': 17,
 'TV Movie': 18,
 'Mystery': 19}

In [8]:
#for mapping to coarse grained labels (in this situation we don't do that so labels map to self)
genre_map = {'War': 'War',
 'Family': 'Family',
 'Science Fiction': 'Science Fiction',
 'Thriller': 'Thriller',
 'Horror': 'Horror',
 'Romance': 'Romance',
 'Drama': 'Drama',
 'Foreign': 'Foreign',
 'Documentary': 'Documentary',
 'Fantasy': 'Fantasy',
 'Western': 'Western',
 'History': 'History',
 'Comedy': 'Comedy',
 'Action': 'Action',
 'Adventure': 'Adventure',
 'Animation': 'Animation',
 'Crime': 'Crime',
 'Music': 'Music',
 'TV Movie': 'TV Movie',
 'Mystery': 'Mystery'}

In [9]:
def getGenresVects():
    y = all_data['genres']
    ret = y.apply(parse_genres_json)
    all_data['genres_vect'] = ret
    return ret

In [10]:
labels_vects = getGenresVects() #get label vectors for genres indexed by indexes in genre_dict

In [11]:
#put to lower case, remove punctation
def cleanText(text):
    no_stopword_text = [w for w in text.split() if not w in stop_words]
    text = ' '.join(no_stopword_text)
    text = re.sub(r'[^a-z A-Z0-9]', "", text) #maybe shouldn't remove punction between words here?
    text = text.lower()
    return text
all_data['cleanOverview'] = all_data['overview'].apply(cleanText)

In [12]:
all_data = all_data[all_data.genres_vect.map(sum) > 0]

In [13]:
#logistic regression data
lr_data = all_data[['cleanOverview', 'genres_vect', 'overview']]

In [14]:
train, test = train_test_split(lr_data, test_size=0.2, random_state=42)

CNN STUFF here

In [15]:
#get word embeddings
x = train['cleanOverview'].values.tolist()
y = train['genres_vect']

In [16]:
x_test = test['cleanOverview'].values.tolist()
y_test = test['genres_vect']

In [17]:
y_train = y.tolist()
y_train = np.array(y_train)

In [18]:
y_test = y_test.tolist()
y_test = np.array(y_test)

In [19]:
tok = [word_tokenize(sent) for sent in x]

In [20]:
word_vec_len = 32
model = Word2Vec(tok, min_count = 2, size=word_vec_len)

In [21]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

num_words_kept = 100000 #using 100000 most popular words, use throughout

tokenizer = Tokenizer(num_words_kept)
tokenizer.fit_on_texts(x)
sequences = tokenizer.texts_to_sequences(x)

max_seq_len = 150

x_train_seq = pad_sequences(sequences, maxlen=max_seq_len)

In [22]:
test_sequences = tokenizer.texts_to_sequences(x_test)
x_test_seq = pad_sequences(test_sequences, maxlen=max_seq_len)

In [23]:
embeddings_index = {}
for w in model.wv.vocab.keys():
    embeddings_index[w] = model.wv[w]


embedding_matrix = np.zeros((num_words_kept, word_vec_len))
for word, i in tokenizer.word_index.items():
    if i >= num_words_kept:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [24]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

def get_per_label_metrics(real_labels_matrix, predictions_labels_matrix):
    for genre in genre_dict.keys():
        index = genre_dict[genre]
        real_labels_vect = real_labels_matrix[:, index]
        prediction_vect = predictions_labels_matrix[:,index]
        print("Accuruacy for " + genre + ": " + str(accuracy_score(real_labels_vect, prediction_vect)))
        print("Precision for " + genre + ": " + str(precision_score(real_labels_vect, prediction_vect)))
        print("Recall for " + genre + ": " + str(recall_score(real_labels_vect, prediction_vect)))
        print()

In [25]:
#size of intersection of predicted and actual labels divided by size of their union for each datapoint tested on
#sum those and then divide by number of datapoints
#vectorized for speed
def multi_label_accuracy(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    union = real_labels_matrix | predictions_labels_matrix
    #sum(array.T) gets number of 1s in row
    row_wise_accuracy = sum(intersection.T) / sum(union.T)
    return sum(row_wise_accuracy) / real_labels_matrix.shape[0]

#size of intersection of predicted and actual labels divided by size of predicted set for each datapoint tested on
#sum those and divide by number of datapoints
#if no predicted labels, don't count that row towards the precision as that would be undefined
def multi_label_precision(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    precision_sum = 0
    num_rows = 0
    for row in range(intersection.shape[0]):
        if sum(predictions_labels_matrix[row]) > 0: #if there is at least one prediction for this row
            num_rows += 1
            precision_sum += sum(intersection[row]) / sum(predictions_labels_matrix[row])
    if num_rows == 0:
        return 0#no labels predicted at all will give us 0 precision as precision makes no sense here
    return precision_sum / num_rows

#size of intersection of predicted and actual labels divided by size of real label set for each datapoint tested on
#sum those and divide by number of datapoints
#all datapoints should have at least 1 real label in this data set
#vectorized for speed
def multi_label_recall(real_labels_matrix, predictions_labels_matrix):
    #binary so set intersection is and operator
    intersection = real_labels_matrix & predictions_labels_matrix
    #set union for binary is same as or operator
    #sum(array.T) gets number of 1s in row
    row_wise_recall = sum(intersection.T) / sum(real_labels_matrix.T)
    return sum(row_wise_recall) / real_labels_matrix.shape[0]

#lower is better
def hamming_loss(real_labels_matrix, predictions_labels_matrix):
    return (np.logical_xor(real_labels_matrix, predictions_labels_matrix)).sum()/(real_labels_matrix.shape[0] * real_labels_matrix.shape[1])

import keras.backend as K

#metric for keras for early stopping
#takes in raw labels from kerass (not yet converted to 0 and 1s)
#NOT the same as accuracy, this is total labels correctly identified divided by union of total labels
#this weights rows with more labels higher, where accruacy does not, but this is still a good metric for early stopping
def raw_multi_label_accuracy(y_true, y_pred):
    positives = K.greater_equal(y_pred, 0.5)
    positives = K.cast(positives, K.floatx())
    new_y_pred = positives #+ ((1-positives)*y_pred)
    intersection = y_true * new_y_pred
    union = 1 -((1-y_true)*(1-new_y_pred))
    accuracy = K.sum(intersection) / K.sum(union)
    return accuracy
    

In [26]:
from keras.callbacks import EarlyStopping
#for early stopping only after certain number of epochs. wait until delay epochs until early stopping
class DelayedEarlyStopping(EarlyStopping):
    def __init__(self, monitor, min_delta=0, patience=0, verbose=0, mode='auto', delay = 100):
        super(DelayedEarlyStopping, self).__init__()
        self.delay = delay

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.delay:
            super().on_epoch_end(epoch, logs)

In [24]:
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.regularizers import l2

model_cnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
#e = Embedding(num_words_kept, word_vec_len, input_length=max_seq_len, trainable=True)
model_cnn.add(e)
model_cnn.add(Conv1D(filters=50, kernel_size=2, padding='valid', activation='relu', strides=1))
model_cnn.add(GlobalMaxPooling1D())
model_cnn.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(50, activation='relu', kernel_regularizer=l2(0.001)))
model_cnn.add(Dropout(.5))
model_cnn.add(Dense(len(genre_dict), activation='sigmoid'))
model_cnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
#model_cnn_01.fit(x_train_seq, y_train, validation_data=(x_val_seq, y_validation), epochs=5, batch_size=32, verbose=2)
model_cnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 2s - loss: 0.7434 - raw_multi_label_accuracy: 0.1475 - val_loss: 0.5313 - val_raw_multi_label_accuracy: 0.2474
Epoch 2/1000
 - 2s - loss: 0.5605 - raw_multi_label_accuracy: 0.1596 - val_loss: 0.4396 - val_raw_multi_label_accuracy: 0.1697
Epoch 3/1000
 - 2s - loss: 0.4800 - raw_multi_label_accuracy: 0.1429 - val_loss: 0.4059 - val_raw_multi_label_accuracy: 0.1695
Epoch 4/1000
 - 2s - loss: 0.4383 - raw_multi_label_accuracy: 0.1371 - val_loss: 0.3832 - val_raw_multi_label_accuracy: 0.1697
Epoch 5/1000
 - 2s - loss: 0.4124 - raw_multi_label_accuracy: 0.1182 - val_loss: 0.3696 - val_raw_multi_label_accuracy: 0.1697
Epoch

Epoch 56/1000
 - 2s - loss: 0.2050 - raw_multi_label_accuracy: 0.4350 - val_loss: 0.4118 - val_raw_multi_label_accuracy: 0.1971
Epoch 57/1000
 - 2s - loss: 0.2016 - raw_multi_label_accuracy: 0.4404 - val_loss: 0.4272 - val_raw_multi_label_accuracy: 0.1941
Epoch 58/1000
 - 2s - loss: 0.2023 - raw_multi_label_accuracy: 0.4408 - val_loss: 0.4387 - val_raw_multi_label_accuracy: 0.1962
Epoch 59/1000
 - 2s - loss: 0.1992 - raw_multi_label_accuracy: 0.4479 - val_loss: 0.4469 - val_raw_multi_label_accuracy: 0.1965
Epoch 60/1000
 - 2s - loss: 0.1978 - raw_multi_label_accuracy: 0.4513 - val_loss: 0.4408 - val_raw_multi_label_accuracy: 0.1934
Epoch 61/1000
 - 2s - loss: 0.1979 - raw_multi_label_accuracy: 0.4498 - val_loss: 0.4525 - val_raw_multi_label_accuracy: 0.1960
Epoch 62/1000
 - 2s - loss: 0.1954 - raw_multi_label_accuracy: 0.4578 - val_loss: 0.4539 - val_raw_multi_label_accuracy: 0.2021
Epoch 63/1000
 - 2s - loss: 0.1945 - raw_multi_label_accuracy: 0.4598 - val_loss: 0.4653 - val_raw_multi

Epoch 120/1000
 - 1s - loss: 0.1633 - raw_multi_label_accuracy: 0.5142 - val_loss: 0.6361 - val_raw_multi_label_accuracy: 0.1984
Epoch 121/1000
 - 2s - loss: 0.1612 - raw_multi_label_accuracy: 0.5231 - val_loss: 0.6497 - val_raw_multi_label_accuracy: 0.1991
Epoch 122/1000
 - 2s - loss: 0.1613 - raw_multi_label_accuracy: 0.5161 - val_loss: 0.6380 - val_raw_multi_label_accuracy: 0.1961
Epoch 123/1000
 - 2s - loss: 0.1604 - raw_multi_label_accuracy: 0.5194 - val_loss: 0.6634 - val_raw_multi_label_accuracy: 0.1911
Epoch 124/1000
 - 2s - loss: 0.1611 - raw_multi_label_accuracy: 0.5159 - val_loss: 0.6507 - val_raw_multi_label_accuracy: 0.1897
Epoch 125/1000
 - 2s - loss: 0.1592 - raw_multi_label_accuracy: 0.5180 - val_loss: 0.6604 - val_raw_multi_label_accuracy: 0.1937
Epoch 126/1000
 - 2s - loss: 0.1609 - raw_multi_label_accuracy: 0.5163 - val_loss: 0.6479 - val_raw_multi_label_accuracy: 0.1919
Epoch 127/1000
 - 2s - loss: 0.1607 - raw_multi_label_accuracy: 0.5167 - val_loss: 0.6479 - val_r

Epoch 184/1000
 - 2s - loss: 0.1429 - raw_multi_label_accuracy: 0.5633 - val_loss: 0.7802 - val_raw_multi_label_accuracy: 0.1843
Epoch 185/1000
 - 2s - loss: 0.1405 - raw_multi_label_accuracy: 0.5714 - val_loss: 0.7823 - val_raw_multi_label_accuracy: 0.1744
Epoch 186/1000
 - 2s - loss: 0.1410 - raw_multi_label_accuracy: 0.5736 - val_loss: 0.7781 - val_raw_multi_label_accuracy: 0.1765
Epoch 187/1000
 - 2s - loss: 0.1410 - raw_multi_label_accuracy: 0.5677 - val_loss: 0.7840 - val_raw_multi_label_accuracy: 0.1851
Epoch 188/1000
 - 2s - loss: 0.1395 - raw_multi_label_accuracy: 0.5685 - val_loss: 0.8134 - val_raw_multi_label_accuracy: 0.1919
Epoch 189/1000
 - 2s - loss: 0.1407 - raw_multi_label_accuracy: 0.5742 - val_loss: 0.7848 - val_raw_multi_label_accuracy: 0.1761
Epoch 190/1000
 - 2s - loss: 0.1399 - raw_multi_label_accuracy: 0.5763 - val_loss: 0.7606 - val_raw_multi_label_accuracy: 0.1855
Epoch 191/1000
 - 2s - loss: 0.1411 - raw_multi_label_accuracy: 0.5682 - val_loss: 0.8083 - val_r

Epoch 248/1000
 - 2s - loss: 0.1260 - raw_multi_label_accuracy: 0.6144 - val_loss: 0.8666 - val_raw_multi_label_accuracy: 0.1776
Epoch 249/1000
 - 2s - loss: 0.1270 - raw_multi_label_accuracy: 0.6155 - val_loss: 0.8879 - val_raw_multi_label_accuracy: 0.1737
Epoch 250/1000
 - 2s - loss: 0.1260 - raw_multi_label_accuracy: 0.6221 - val_loss: 0.8796 - val_raw_multi_label_accuracy: 0.1673
Epoch 251/1000
 - 2s - loss: 0.1259 - raw_multi_label_accuracy: 0.6118 - val_loss: 0.8673 - val_raw_multi_label_accuracy: 0.1784
Epoch 252/1000
 - 2s - loss: 0.1252 - raw_multi_label_accuracy: 0.6193 - val_loss: 0.8753 - val_raw_multi_label_accuracy: 0.1729
Epoch 253/1000
 - 2s - loss: 0.1260 - raw_multi_label_accuracy: 0.6183 - val_loss: 0.8563 - val_raw_multi_label_accuracy: 0.1709
Epoch 254/1000
 - 2s - loss: 0.1255 - raw_multi_label_accuracy: 0.6148 - val_loss: 0.9154 - val_raw_multi_label_accuracy: 0.1717


<keras.callbacks.History at 0x7fd58c9c58d0>

In [25]:
def nn_output_to_predictions(res):
    label_predictions = []
    for i in range(res.shape[0]):
        pred = [0]*len(genre_dict)
        for j in range(res.shape[1]):
            if res[i][j] >= .5:
                pred[j] = 1
        label_predictions.append(pred)
    return np.array(label_predictions)

In [26]:
predictions = nn_output_to_predictions(model_cnn.predict(x_test_seq))

In [27]:
y_test[:,0].sum()

21

In [28]:
predictions[:,0].sum()

0

In [29]:
predictions[0]

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [30]:
multi_label_accuracy(y_test, predictions)

0.25389048416686594

In [31]:
multi_label_precision(y_test, predictions)

0.47152521525215263

In [32]:
multi_label_recall(y_test, predictions)

0.3154702081837761

In [33]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 86.31490787269682


In [34]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.964824120603015
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9028475711892797
Precision for Family: 0.2
Recall for Family: 0.08695652173913043

Accuruacy for Science Fiction: 0.8877721943048577
Precision for Science Fiction: 0.2222222222222222
Recall for Science Fiction: 0.03225806451612903

Accuruacy for Thriller: 0.6532663316582915
Precision for Thriller: 0.32727272727272727
Recall for Thriller: 0.21301775147928995

Accuruacy for Horror: 0.8961474036850922
Precision for Horror: 0.3333333333333333
Recall for Horror: 0.10714285714285714

Accuruacy for Romance: 0.7721943048576214
Precision for Romance: 0.3595505617977528
Recall for Romance: 0.2882882882882883

Accuruacy for Drama: 0.5711892797319933
Precision for Drama: 0.584
Recall for Drama: 0.6865203761755486

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9731993299832495
Precision for Documentary: 0.0
Recall for D

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


CNN but with multiple filter sizes so we don't just filter on group of words at a time

In [35]:
from keras.layers import Input, Dense, concatenate, Activation
from keras.models import Model

model_input = Input(shape=(max_seq_len,), dtype='int32')

e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)(model_input)
two_word_filter = Conv1D(filters=100, kernel_size=2, padding='valid', activation='relu', strides=1)(e)
two_word_filter = GlobalMaxPooling1D()(two_word_filter)
three_word_filter = Conv1D(filters=100, kernel_size=3, padding='valid', activation='relu', strides=1)(e)
three_word_filter = GlobalMaxPooling1D()(three_word_filter)
four_word_filter = Conv1D(filters=100, kernel_size=4, padding='valid', activation='relu', strides=1)(e)
four_word_filter = GlobalMaxPooling1D()(four_word_filter)
merged = concatenate([two_word_filter, three_word_filter, four_word_filter], axis=1)

merged = Dense(256, activation='relu', kernel_regularizer=l2(0.001))(merged)
merged = Dropout(0.5)(merged)
merged = Dense(len(genre_dict))(merged)
output = Activation('sigmoid')(merged)
model = Model(inputs=[model_input], outputs=[output])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])

In [36]:
model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 5s - loss: 0.6582 - raw_multi_label_accuracy: 0.1241 - val_loss: 0.5197 - val_raw_multi_label_accuracy: 0.0463
Epoch 2/1000
 - 4s - loss: 0.5103 - raw_multi_label_accuracy: 0.1161 - val_loss: 0.4582 - val_raw_multi_label_accuracy: 0.0016
Epoch 3/1000
 - 4s - loss: 0.4520 - raw_multi_label_accuracy: 0.1097 - val_loss: 0.4194 - val_raw_multi_label_accuracy: 0.0446
Epoch 4/1000
 - 5s - loss: 0.4170 - raw_multi_label_accuracy: 0.1064 - val_loss: 0.3949 - val_raw_multi_label_accuracy: 0.0000e+00
Epoch 5/1000
 - 5s - loss: 0.3939 - raw_multi_label_accuracy: 0.1086 - val_loss: 0.3781 - val_raw_multi_label_accuracy: 0.1436
Epoch 6/1000
 - 5s - loss: 0.3778 - raw_multi_label_accuracy: 0.1081 - val_loss: 0.3655 - val_raw_multi_label_accuracy: 0.1697
Epoch 7/1000
 - 4s - loss: 0.3650 - raw_multi_label_accuracy: 0.0983 - val_loss: 0.3566 - val_raw_multi_label_accuracy: 0.1697
Epoch 8/1000
 - 5s - loss: 0.3547 - raw_multi_label_accuracy

Epoch 65/1000
 - 4s - loss: 0.0625 - raw_multi_label_accuracy: 0.8530 - val_loss: 0.6433 - val_raw_multi_label_accuracy: 0.2096
Epoch 66/1000
 - 4s - loss: 0.0622 - raw_multi_label_accuracy: 0.8539 - val_loss: 0.6299 - val_raw_multi_label_accuracy: 0.2041
Epoch 67/1000
 - 4s - loss: 0.0610 - raw_multi_label_accuracy: 0.8606 - val_loss: 0.6444 - val_raw_multi_label_accuracy: 0.2191
Epoch 68/1000
 - 4s - loss: 0.0612 - raw_multi_label_accuracy: 0.8578 - val_loss: 0.6384 - val_raw_multi_label_accuracy: 0.2100
Epoch 69/1000
 - 4s - loss: 0.0590 - raw_multi_label_accuracy: 0.8592 - val_loss: 0.6757 - val_raw_multi_label_accuracy: 0.1907
Epoch 70/1000
 - 4s - loss: 0.0601 - raw_multi_label_accuracy: 0.8596 - val_loss: 0.6564 - val_raw_multi_label_accuracy: 0.2100
Epoch 71/1000
 - 4s - loss: 0.0595 - raw_multi_label_accuracy: 0.8623 - val_loss: 0.6648 - val_raw_multi_label_accuracy: 0.2156
Epoch 72/1000
 - 4s - loss: 0.0568 - raw_multi_label_accuracy: 0.8676 - val_loss: 0.6914 - val_raw_multi

Epoch 129/1000
 - 4s - loss: 0.0326 - raw_multi_label_accuracy: 0.9356 - val_loss: 0.8557 - val_raw_multi_label_accuracy: 0.2040
Epoch 130/1000
 - 4s - loss: 0.0321 - raw_multi_label_accuracy: 0.9379 - val_loss: 0.8480 - val_raw_multi_label_accuracy: 0.1896
Epoch 131/1000
 - 4s - loss: 0.0332 - raw_multi_label_accuracy: 0.9336 - val_loss: 0.8330 - val_raw_multi_label_accuracy: 0.2024
Epoch 132/1000
 - 4s - loss: 0.0325 - raw_multi_label_accuracy: 0.9397 - val_loss: 0.8553 - val_raw_multi_label_accuracy: 0.1887
Epoch 133/1000
 - 4s - loss: 0.0322 - raw_multi_label_accuracy: 0.9355 - val_loss: 0.8641 - val_raw_multi_label_accuracy: 0.1862
Epoch 134/1000
 - 4s - loss: 0.0298 - raw_multi_label_accuracy: 0.9420 - val_loss: 0.8722 - val_raw_multi_label_accuracy: 0.1912
Epoch 135/1000
 - 4s - loss: 0.0293 - raw_multi_label_accuracy: 0.9451 - val_loss: 0.8774 - val_raw_multi_label_accuracy: 0.1987
Epoch 136/1000
 - 4s - loss: 0.0303 - raw_multi_label_accuracy: 0.9402 - val_loss: 0.8721 - val_r

Epoch 193/1000
 - 4s - loss: 0.0222 - raw_multi_label_accuracy: 0.9609 - val_loss: 1.0022 - val_raw_multi_label_accuracy: 0.1922
Epoch 194/1000
 - 4s - loss: 0.0214 - raw_multi_label_accuracy: 0.9630 - val_loss: 1.0198 - val_raw_multi_label_accuracy: 0.1929
Epoch 195/1000
 - 4s - loss: 0.0220 - raw_multi_label_accuracy: 0.9592 - val_loss: 0.9591 - val_raw_multi_label_accuracy: 0.1924
Epoch 196/1000
 - 4s - loss: 0.0213 - raw_multi_label_accuracy: 0.9628 - val_loss: 0.9893 - val_raw_multi_label_accuracy: 0.1944
Epoch 197/1000
 - 4s - loss: 0.0219 - raw_multi_label_accuracy: 0.9580 - val_loss: 0.9900 - val_raw_multi_label_accuracy: 0.1713
Epoch 198/1000
 - 4s - loss: 0.0227 - raw_multi_label_accuracy: 0.9610 - val_loss: 0.9497 - val_raw_multi_label_accuracy: 0.1945
Epoch 199/1000
 - 4s - loss: 0.0214 - raw_multi_label_accuracy: 0.9631 - val_loss: 1.0032 - val_raw_multi_label_accuracy: 0.1861
Epoch 200/1000
 - 4s - loss: 0.0205 - raw_multi_label_accuracy: 0.9664 - val_loss: 1.0222 - val_r

<keras.callbacks.History at 0x7fd58c1d5cf8>

In [37]:
predictions = nn_output_to_predictions(model.predict(x_test_seq))

In [38]:
multi_label_accuracy(y_test, predictions)

0.2095616973757677

In [39]:
multi_label_precision(y_test, predictions)

0.42306666666666676

In [40]:
multi_label_recall(y_test, predictions)

0.25687165988673527

In [41]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 86.05527638190955


In [42]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.9614740368509213
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.8994974874371859
Precision for Family: 0.1111111111111111
Recall for Family: 0.043478260869565216

Accuruacy for Science Fiction: 0.8944723618090452
Precision for Science Fiction: 0.3333333333333333
Recall for Science Fiction: 0.016129032258064516

Accuruacy for Thriller: 0.6917922948073701
Precision for Thriller: 0.42718446601941745
Recall for Thriller: 0.2603550295857988

Accuruacy for Horror: 0.8626465661641541
Precision for Horror: 0.19047619047619047
Recall for Horror: 0.14285714285714285

Accuruacy for Romance: 0.7906197654941374
Precision for Romance: 0.4
Recall for Romance: 0.25225225225225223

Accuruacy for Drama: 0.5108877721943048
Precision for Drama: 0.5610859728506787
Recall for Drama: 0.3887147335423197

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9715242881072027
Precision for Documentary:

Regular Neural Network

In [43]:
normal_nn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
normal_nn.add(e)
normal_nn.add(Flatten())
normal_nn.add(Dense(256, activation='relu'))
normal_nn.add(Dense(len(genre_dict), activation='sigmoid'))
normal_nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
normal_nn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 2s - loss: 0.4009 - raw_multi_label_accuracy: 0.1210 - val_loss: 0.3383 - val_raw_multi_label_accuracy: 0.0367
Epoch 2/1000
 - 2s - loss: 0.3216 - raw_multi_label_accuracy: 0.1196 - val_loss: 0.3316 - val_raw_multi_label_accuracy: 0.0802
Epoch 3/1000
 - 2s - loss: 0.3068 - raw_multi_label_accuracy: 0.1211 - val_loss: 0.3261 - val_raw_multi_label_accuracy: 0.1471
Epoch 4/1000
 - 2s - loss: 0.2978 - raw_multi_label_accuracy: 0.1203 - val_loss: 0.3258 - val_raw_multi_label_accuracy: 0.1460
Epoch 5/1000
 - 2s - loss: 0.2895 - raw_multi_label_accuracy: 0.1506 - val_loss: 0.3234 - val_raw_multi_label_accuracy: 0.1291
Epoch 6/1000
 - 2s - loss: 0.2780 - raw_multi_label_accuracy: 0.1746 - val_loss: 0.3269 - val_raw_multi_label_accuracy: 0.1069
Epoch 7/1000
 - 2s - loss: 0.2613 - raw_multi_label_accuracy: 0.2294 - val_loss: 0.3222 - val_raw_multi_label_accuracy: 0.1030
Epoch 8/1000
 - 2s - loss: 0.2415 - raw_multi_label_accuracy: 0.

Epoch 65/1000
 - 2s - loss: 8.7051e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.7567 - val_raw_multi_label_accuracy: 0.1805
Epoch 66/1000
 - 2s - loss: 8.1962e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.7635 - val_raw_multi_label_accuracy: 0.1808
Epoch 67/1000
 - 2s - loss: 8.0361e-04 - raw_multi_label_accuracy: 0.9995 - val_loss: 0.7624 - val_raw_multi_label_accuracy: 0.1839
Epoch 68/1000
 - 2s - loss: 7.7456e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.7637 - val_raw_multi_label_accuracy: 0.1788
Epoch 69/1000
 - 2s - loss: 7.4280e-04 - raw_multi_label_accuracy: 0.9995 - val_loss: 0.7696 - val_raw_multi_label_accuracy: 0.1839
Epoch 70/1000
 - 2s - loss: 7.5289e-04 - raw_multi_label_accuracy: 0.9993 - val_loss: 0.7753 - val_raw_multi_label_accuracy: 0.1839
Epoch 71/1000
 - 2s - loss: 6.9750e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.7782 - val_raw_multi_label_accuracy: 0.1793
Epoch 72/1000
 - 2s - loss: 6.9164e-04 - raw_multi_label_accuracy: 0.9992 - 

Epoch 127/1000
 - 2s - loss: 2.3287e-04 - raw_multi_label_accuracy: 0.9993 - val_loss: 0.8991 - val_raw_multi_label_accuracy: 0.1849
Epoch 128/1000
 - 2s - loss: 2.4380e-04 - raw_multi_label_accuracy: 0.9991 - val_loss: 0.8994 - val_raw_multi_label_accuracy: 0.1847
Epoch 129/1000
 - 2s - loss: 2.3854e-04 - raw_multi_label_accuracy: 0.9993 - val_loss: 0.8976 - val_raw_multi_label_accuracy: 0.1837
Epoch 130/1000
 - 2s - loss: 2.1478e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 0.9107 - val_raw_multi_label_accuracy: 0.1762
Epoch 131/1000
 - 2s - loss: 2.1985e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.9080 - val_raw_multi_label_accuracy: 0.1835
Epoch 132/1000
 - 2s - loss: 2.4183e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 0.9034 - val_raw_multi_label_accuracy: 0.1826
Epoch 133/1000
 - 2s - loss: 1.9431e-04 - raw_multi_label_accuracy: 0.9993 - val_loss: 0.9098 - val_raw_multi_label_accuracy: 0.1818
Epoch 134/1000
 - 2s - loss: 2.4170e-04 - raw_multi_label_accuracy: 0

Epoch 189/1000
 - 2s - loss: 1.8538e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 0.9721 - val_raw_multi_label_accuracy: 0.1796
Epoch 190/1000
 - 2s - loss: 1.5303e-04 - raw_multi_label_accuracy: 0.9994 - val_loss: 0.9762 - val_raw_multi_label_accuracy: 0.1814
Epoch 191/1000
 - 2s - loss: 1.3828e-04 - raw_multi_label_accuracy: 0.9993 - val_loss: 0.9792 - val_raw_multi_label_accuracy: 0.1814
Epoch 192/1000
 - 2s - loss: 1.9706e-04 - raw_multi_label_accuracy: 0.9990 - val_loss: 0.9768 - val_raw_multi_label_accuracy: 0.1837
Epoch 193/1000
 - 2s - loss: 1.4305e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 0.9844 - val_raw_multi_label_accuracy: 0.1753
Epoch 194/1000
 - 2s - loss: 2.4240e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 0.9746 - val_raw_multi_label_accuracy: 0.1891
Epoch 195/1000
 - 2s - loss: 1.1891e-04 - raw_multi_label_accuracy: 0.9996 - val_loss: 0.9930 - val_raw_multi_label_accuracy: 0.1740
Epoch 196/1000
 - 2s - loss: 1.9627e-04 - raw_multi_label_accuracy: 0

Epoch 251/1000
 - 2s - loss: 1.2942e-04 - raw_multi_label_accuracy: 0.9991 - val_loss: 1.0263 - val_raw_multi_label_accuracy: 0.1788
Epoch 252/1000
 - 2s - loss: 1.5348e-04 - raw_multi_label_accuracy: 0.9995 - val_loss: 1.0135 - val_raw_multi_label_accuracy: 0.1886
Epoch 253/1000
 - 2s - loss: 1.4070e-04 - raw_multi_label_accuracy: 0.9989 - val_loss: 1.0138 - val_raw_multi_label_accuracy: 0.1899


<keras.callbacks.History at 0x7fd58b846e48>

In [44]:
predictions = nn_output_to_predictions(normal_nn.predict(x_test_seq))

In [45]:
multi_label_accuracy(y_test, predictions)

0.2296960995453457

In [46]:
multi_label_precision(y_test, predictions)

0.5471167369901545

In [47]:
multi_label_recall(y_test, predictions)

0.2564409348328946

In [48]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 87.70519262981574


In [49]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.9631490787269682
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.9212730318257957
Precision for Family: 0.0
Recall for Family: 0.0

Accuruacy for Science Fiction: 0.8994974874371859
Precision for Science Fiction: 0.75
Recall for Science Fiction: 0.04838709677419355

Accuruacy for Thriller: 0.7252931323283082
Precision for Thriller: 0.5510204081632653
Recall for Thriller: 0.15976331360946747

Accuruacy for Horror: 0.9011725293132329
Precision for Horror: 0.0
Recall for Horror: 0.0

Accuruacy for Romance: 0.7973199329983249
Precision for Romance: 0.38095238095238093
Recall for Romance: 0.14414414414414414

Accuruacy for Drama: 0.609715242881072
Precision for Drama: 0.6221590909090909
Recall for Drama: 0.6865203761755486

Accuruacy for Foreign: 0.9882747068676717
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9715242881072027
Precision for Documentary: 0.0
Recall for Documentary: 0.0

Accuruacy for Fantasy: 0.91959

LSTM

In [29]:
from keras.layers import LSTM
lstm_model = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
lstm_model.add(e)
lstm_model.add(LSTM(100, dropout=0.25, recurrent_dropout=0.25))
rnn.add(Dense(256, activation='relu'))
lstm_model.add(Dense(len(genre_dict), activation='sigmoid'))
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
lstm_model.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 10s - loss: 0.5446 - raw_multi_label_accuracy: 0.1301 - val_loss: 0.3392 - val_raw_multi_label_accuracy: 0.1695
Epoch 2/1000
 - 9s - loss: 0.3211 - raw_multi_label_accuracy: 0.1177 - val_loss: 0.3167 - val_raw_multi_label_accuracy: 0.1635
Epoch 3/1000
 - 9s - loss: 0.3131 - raw_multi_label_accuracy: 0.0871 - val_loss: 0.3142 - val_raw_multi_label_accuracy: 0.0976
Epoch 4/1000
 - 9s - loss: 0.3119 - raw_multi_label_accuracy: 0.1449 - val_loss: 0.3144 - val_raw_multi_label_accuracy: 0.0127
Epoch 5/1000
 - 9s - loss: 0.3120 - raw_multi_label_accuracy: 0.0806 - val_loss: 0.3141 - val_raw_multi_label_accuracy: 0.1695
Epoch 6/1000
 - 14s - loss: 0.3119 - raw_multi_label_accuracy: 0.0932 - val_loss: 0.3143 - val_raw_multi_label_accuracy: 0.1003
Epoch 7/1000
 - 13s - loss: 0.3115 - raw_multi_label_accuracy: 0.0650 - va

Epoch 62/1000
 - 13s - loss: 0.1060 - raw_multi_label_accuracy: 0.7292 - val_loss: 0.4455 - val_raw_multi_label_accuracy: 0.1934
Epoch 63/1000
 - 9s - loss: 0.1039 - raw_multi_label_accuracy: 0.7332 - val_loss: 0.4464 - val_raw_multi_label_accuracy: 0.1948
Epoch 64/1000
 - 11s - loss: 0.1015 - raw_multi_label_accuracy: 0.7443 - val_loss: 0.4542 - val_raw_multi_label_accuracy: 0.1908
Epoch 65/1000
 - 10s - loss: 0.0994 - raw_multi_label_accuracy: 0.7463 - val_loss: 0.4550 - val_raw_multi_label_accuracy: 0.1983
Epoch 66/1000
 - 9s - loss: 0.0977 - raw_multi_label_accuracy: 0.7490 - val_loss: 0.4572 - val_raw_multi_label_accuracy: 0.1909
Epoch 67/1000
 - 10s - loss: 0.0956 - raw_multi_label_accuracy: 0.7594 - val_loss: 0.4618 - val_raw_multi_label_accuracy: 0.1949
Epoch 68/1000
 - 12s - loss: 0.0929 - raw_multi_label_accuracy: 0.7655 - val_loss: 0.4658 - val_raw_multi_label_accuracy: 0.1886
Epoch 69/1000
 - 12s - loss: 0.0910 - raw_multi_label_accuracy: 0.7729 - val_loss: 0.4688 - val_raw

Epoch 126/1000
 - 13s - loss: 0.0291 - raw_multi_label_accuracy: 0.9444 - val_loss: 0.6404 - val_raw_multi_label_accuracy: 0.1876
Epoch 127/1000
 - 13s - loss: 0.0283 - raw_multi_label_accuracy: 0.9505 - val_loss: 0.6434 - val_raw_multi_label_accuracy: 0.1868
Epoch 128/1000
 - 12s - loss: 0.0280 - raw_multi_label_accuracy: 0.9464 - val_loss: 0.6485 - val_raw_multi_label_accuracy: 0.1788
Epoch 129/1000
 - 10s - loss: 0.0273 - raw_multi_label_accuracy: 0.9506 - val_loss: 0.6486 - val_raw_multi_label_accuracy: 0.1867
Epoch 130/1000
 - 9s - loss: 0.0265 - raw_multi_label_accuracy: 0.9525 - val_loss: 0.6512 - val_raw_multi_label_accuracy: 0.1931
Epoch 131/1000
 - 11s - loss: 0.0261 - raw_multi_label_accuracy: 0.9513 - val_loss: 0.6528 - val_raw_multi_label_accuracy: 0.1883
Epoch 132/1000
 - 13s - loss: 0.0255 - raw_multi_label_accuracy: 0.9548 - val_loss: 0.6601 - val_raw_multi_label_accuracy: 0.1838
Epoch 133/1000
 - 12s - loss: 0.0251 - raw_multi_label_accuracy: 0.9552 - val_loss: 0.6571 

Epoch 190/1000
 - 8s - loss: 0.0101 - raw_multi_label_accuracy: 0.9872 - val_loss: 0.7847 - val_raw_multi_label_accuracy: 0.2102
Epoch 191/1000
 - 9s - loss: 0.0103 - raw_multi_label_accuracy: 0.9860 - val_loss: 0.7817 - val_raw_multi_label_accuracy: 0.2038
Epoch 192/1000
 - 8s - loss: 0.0102 - raw_multi_label_accuracy: 0.9836 - val_loss: 0.7849 - val_raw_multi_label_accuracy: 0.2061
Epoch 193/1000
 - 8s - loss: 0.0098 - raw_multi_label_accuracy: 0.9875 - val_loss: 0.7937 - val_raw_multi_label_accuracy: 0.1823
Epoch 194/1000
 - 9s - loss: 0.0098 - raw_multi_label_accuracy: 0.9869 - val_loss: 0.7863 - val_raw_multi_label_accuracy: 0.2041
Epoch 195/1000
 - 8s - loss: 0.0095 - raw_multi_label_accuracy: 0.9885 - val_loss: 0.7897 - val_raw_multi_label_accuracy: 0.2104
Epoch 196/1000
 - 8s - loss: 0.0094 - raw_multi_label_accuracy: 0.9879 - val_loss: 0.7864 - val_raw_multi_label_accuracy: 0.1926
Epoch 197/1000
 - 8s - loss: 0.0092 - raw_multi_label_accuracy: 0.9894 - val_loss: 0.7852 - val_r

<keras.callbacks.History at 0x7efc8432d208>

In [51]:
predictions = nn_output_to_predictions(lstm_model.predict(x_test_seq))

In [52]:
multi_label_accuracy(y_test, predictions)

0.2533886363032092

In [53]:
multi_label_precision(y_test, predictions)

0.4050647820965844

In [54]:
multi_label_recall(y_test, predictions)

0.3569035654462792

In [55]:
print("Percent of correctly decided label decisions: " + str(100* (1-hamming_loss(y_test, predictions))))

Percent of correctly decided label decisions: 85.01675041876047


In [56]:
get_per_label_metrics(y_test, predictions)

Accuruacy for War: 0.9597989949748744
Precision for War: 0.0
Recall for War: 0.0

Accuruacy for Family: 0.8693467336683417
Precision for Family: 0.16666666666666666
Recall for Family: 0.17391304347826086

Accuruacy for Science Fiction: 0.8827470686767169
Precision for Science Fiction: 0.34615384615384615
Recall for Science Fiction: 0.14516129032258066

Accuruacy for Thriller: 0.6331658291457286
Precision for Thriller: 0.33974358974358976
Recall for Thriller: 0.3136094674556213

Accuruacy for Horror: 0.8107202680067002
Precision for Horror: 0.16470588235294117
Recall for Horror: 0.25

Accuruacy for Romance: 0.7370184254606366
Precision for Romance: 0.2982456140350877
Recall for Romance: 0.3063063063063063

Accuruacy for Drama: 0.5695142378559463
Precision for Drama: 0.6148148148148148
Recall for Drama: 0.5203761755485894

Accuruacy for Foreign: 0.9865996649916248
Precision for Foreign: 0.0
Recall for Foreign: 0.0

Accuruacy for Documentary: 0.9715242881072027
Precision for Documentary: 

In [28]:
from keras.layers import SimpleRNN
rnn = Sequential()
e = Embedding(num_words_kept, word_vec_len, weights=[embedding_matrix], input_length=max_seq_len, trainable=True)
rnn.add(e)
rnn.add(SimpleRNN(32, activation = 'relu'))
rnn.add(Dense(256, activation='relu'))
rnn.add(Dense(len(genre_dict), activation='sigmoid'))
rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=[raw_multi_label_accuracy])
rnn.fit(x_train_seq, y_train, validation_split = .1, callbacks = [DelayedEarlyStopping(monitor = 'val_raw_multi_label_accuracy', patience = 5, delay=250)], epochs=1000, batch_size=100, verbose=2)

Instructions for updating:
Use tf.cast instead.
Train on 2149 samples, validate on 239 samples
Epoch 1/1000
 - 3s - loss: 0.5330 - raw_multi_label_accuracy: 0.1461 - val_loss: 0.3709 - val_raw_multi_label_accuracy: 0.1443
Epoch 2/1000
 - 2s - loss: 0.3349 - raw_multi_label_accuracy: 0.1031 - val_loss: 0.3302 - val_raw_multi_label_accuracy: 0.1600
Epoch 3/1000
 - 2s - loss: 0.3215 - raw_multi_label_accuracy: 0.1045 - val_loss: 0.3232 - val_raw_multi_label_accuracy: 0.1111
Epoch 4/1000
 - 2s - loss: 0.3139 - raw_multi_label_accuracy: 0.1220 - val_loss: 0.3193 - val_raw_multi_label_accuracy: 0.0635
Epoch 5/1000
 - 2s - loss: 0.3061 - raw_multi_label_accuracy: 0.1355 - val_loss: 0.3140 - val_raw_multi_label_accuracy: 0.1259
Epoch 6/1000
 - 2s - loss: 0.3000 - raw_multi_label_accuracy: 0.1518 - val_loss: 0.3117 - val_raw_multi_label_accuracy: 0.1575
Epoch 7/1000
 - 2s - loss: 0.2934 - raw_multi_label_accuracy: 0.1534 - val_loss: 0.3098 - val_raw_multi_label_accuracy: 0.1536
Epoch 8/1000
 - 

KeyboardInterrupt: 