In [56]:
import pydot
import json
import pickle
import pandas as pd
import numpy as np
from keras.layers import Dense, Input, Concatenate
from sklearn.feature_extraction.text import CountVectorizer
from keras.models import Model, load_model

from snn import SNN, Entity, Relation
from snn.utils import RelationLayer, EntityLayer

In [57]:
Person = Entity("Person")
Movie = Entity("Movie")

df = pd.read_csv("../data/relations.tsv", sep="\t", names=['Movie', 'Relation', 'Person'], nrows=10000)
relations = [Relation(label=l, dst=Person, src=Movie) for l in df['Relation'].unique()]
entities = [Person, Movie]

In [58]:
ttt = json.load(open('outs/IMDB_ONTO_EMBEDING_NEW_data.json'))
vocab = ttt['vocabulary']
del ttt

In [59]:
cv = CountVectorizer(vocabulary=vocab)

In [60]:
# cv =  pickle.load(open(r'resources/imdb_onto_cv.pkl', 'rb'))

In [61]:
modelonto = load_model('resources/imdb_onto.model', custom_objects={'RelationLayer':RelationLayer, 
                                                              'EntityLayer': EntityLayer})

In [62]:
sentence_size = len(vocab)#modelonto.input_shape[1]
neurons_per_ent = 5
neurons_per_rel = 2*neurons_per_ent
sentence_size

117174

In [63]:
input_plot = Input(shape=(sentence_size,), name='input_plot')
input_title = Input(shape=(sentence_size,), name='input_title')

In [64]:
snn = SNN(entities=entities, relations=relations)#(sentence_input,neurons_per_ent,neurons_per_rel)
output = snn(input_plot)
prmodel = snn.build(input_plot)

In [65]:
def copy_weights(model):
    for i in model.layers:
        if i.weights and i.name:
            try:
                tt = modelonto.get_layer(i.name)
            except ValueError:
                continue
            i.set_weights(tt.get_weights())

In [66]:
copy_weights(prmodel)
# prmodel.trainable = False

In [67]:
x = Concatenate()([prmodel(input_plot), input_title])

ll = len(entities)*neurons_per_ent + len(relations)*neurons_per_rel
x = Dense(ll, activation='relu')(x)
x = Dense(8*ll//10, activation='relu')(x)
x = Dense(8*ll//10, activation='relu')(x)
output = Dense(1, activation='sigmoid', name='out')(x)

model = Model(inputs=[input_plot, input_title], outputs=output)
model.compile(optimizer='RMSprop', loss='binary_crossentropy')

In [68]:
del modelonto

In [69]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

mm = model_to_dot(model, rankdir='LR').create(prog='dot', format='pdf')
with open('outs/imdb_onto.pdf','wb') as f:
    f.write(mm)

In [70]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_plot (InputLayer)         (None, 117174)       0                                            
__________________________________________________________________________________________________
model_3 (Model)                 (None, 12)           7564748     input_plot[0][0]                 
__________________________________________________________________________________________________
input_title (InputLayer)        (None, 117174)       0                                            
__________________________________________________________________________________________________
concatenate_2 (Concatenate)     (None, 117186)       0           model_3[1][0]                    
                                                                 input_title[0][0]                
__________

In [71]:
import pickle

In [72]:
xtrain = pickle.load(open('resources/xtrain.pkl','rb'))
ytrain = pickle.load(open('resources/ytrain.pkl','rb'))
xtest = pickle.load(open('resources/xtest.pkl','rb'))
ytest = pickle.load(open('resources/ytest.pkl','rb'))

In [73]:
input1 = cv.transform(xtrain[0])
input2 = cv.transform(xtrain[1])

In [None]:
model.fit({'input_plot': input1, 'input_title': input2}, ytrain, epochs=5)

Epoch 1/5
Epoch 2/5

In [None]:
model.save('resources/imdb_problem.model')

In [None]:
plots = xtest[0]
titles = xtest[1]

In [None]:
N = 2
K = 1

In [None]:
len(plots)

In [None]:
def correct_answer(ranking):
    return any(rank[1] == 1 for rank in ranking[:K])

In [None]:
total = 0

In [None]:
for i in range(0, len(plots), N):
    input1 = cv.transform(plots[i:i+N])
    input2 = cv.transform(titles[i:i+N])
    pd = model.predict([input1, input2])
    response = zip([item[0] for item in pd], ytest[i:i+N])
    ranking = sorted(response, key=lambda x: x[0], reverse=True)
    total += int(correct_answer(ranking))

In [None]:
mean = total / (len(plots)/N)
mean