In [1]:
from os.path import join
import pandas as pd
import numpy as np
import gzip
import json
import time
import scipy.spatial
import pickle
from time import time
from sklearn.feature_extraction import DictVectorizer

from keras.preprocessing.text import text_to_word_sequence
from keras.models import Model
from keras.layers import Input, Dense
from keras.layers.merge import Dot
from keras.callbacks import EarlyStopping, TensorBoard
from keras import metrics
from keras import backend as K
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.layers.merge import Add, Dot, Concatenate
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.
  return f(*args, **kwds)


In [19]:
def initEmbeddingMap(fileName):
    print("initializing embeddings")
    with open(join("data", "glove.6B", fileName)) as glove:
        return {l[0]: np.asarray(l[1:], dtype="float32") for l in [line.split() for line in glove]}

def clean(text):
    return text_to_word_sequence(text, filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', 
                                 lower=True, split=" ")

def initRawData(input_file):
    print("initializing raw data")
    rawInputData = []
    rawOutputData = []
    with open(input_file,"r") as f:
        for i in f:
            line = f.readline()
            if len(line) < 4:
                break
            lineObj = json.loads(line)
            user = lineObj["reviewerID"]
            item = lineObj["asin"]
            rawInputDataObj = {"user": user, "asin": item}
            rawOutputDataObj = clean(lineObj["reviewText"])
            rawInputData.append(rawInputDataObj)
            rawOutputData.append(rawOutputDataObj)
    return rawInputData, rawOutputData

def group_data(inputData):
    users = {}
    items = {}
    for datum in inputData:
        u = datum["user"]
        i = datum["asin"]
        users.setdefault(u, []).append(i)
        items.setdefault(i, []).append(u)
    return users, items

def getSetFromData(key, data):
    return set([datum.get(key) for datum in data])

def seq_2_matrix(sequence, embedding_map):
    return np.array([embedding_map.get(word) for word in sequence if word in embedding_map])

def matrix_2_avg(emb_matrix):
    return np.mean(emb_matrix, 0)

def initMatInputData(rawInputData, rawOutputData, embedding_map, save=False):
    print('initializing matrix data')
    if len(rawInputData) != len(rawOutputData):
        raise ValueError("Need same size of input and output")
    users = {}
    extra_info = {}
    items = {}
    dictVect = DictVectorizer()
    for i in range(len(rawInputData)):
        vecOutput = seq_2_matrix(rawOutputData[i], embedding_map)
        rawInput = rawInputData[i]
        user = rawInput['user']
        item = rawInput['asin']
        print(vecOutput)
        users.setdefault(user, []).append(vecOutput)
        items.setdefault(item, []).append(vecOutput)
        
    matUserInputData = []
    matItemInputData = []
    users = {k: np.vstack(v) for k, v in users.items()}
    items = {k: np.vstack(v) for k, v in items.items()}
    extra_info['user_seq_sizes'] = [m.shape[0] for m in users.values()]
    extra_info['item_seq_sizes'] = [m.shape[0] for m in items.values()]
    for i in range(len(rawInputData)):
        rawInput = rawInputData[i]
        user = rawInput['user']
        item = rawInput['asin']
        matUserInputData.append(users.get(user))
        matItemInputData.append(items.get(item))
    return matUserInputData, matItemInputData, extra_info

def toKey(user, item):
    return (user, item)

def initRatingsOutputData(rawInputData, input_file):
    ratingsData = []
    userItemDict = {}
    for i in range(len(rawInputData)):
        rawInput = rawInputData[i]
        userItem = toKey(rawInput['user'], rawInput['asin'])
        userItemDict[userItem] = i
        ratingsData.append(None)
    return ratingsData

In [3]:
fileName = "data/reviews_Amazon_Instant_Video_5.json"
rawInputData, rawOutputData = initRawData(input_file=fileName)

initializing raw data


In [6]:
users, movies = group_data(rawInputData)

In [10]:
rand_idxs = np.random.permutation(len(rawOutputData))
rawInputData = [rawInputData[i] for i in rand_idxs]
rawOutputData = [rawOutputData[i] for i in rand_idxs]

In [11]:
embedding_map = initEmbeddingMap("glove.6B.50d.txt")

initializing embeddings


In [20]:
matUserInputData, matMovieInputData, extra_info = initMatInputData(rawInputData, rawOutputData, embedding_map)

initializing matrix data
[[ 0.095387   -0.16865    -0.11514    ..., -0.42103001 -0.53816998  0.13738   ]
 [ 0.55561     0.17039999  0.13692001 ..., -0.32978001  0.24824999 -0.38275   ]
 [ 0.21705     0.46515    -0.46757001 ..., -0.043782    0.41012999  0.1796    ]
 ..., 
 [ 0.41800001  0.24968    -0.41242    ..., -0.18411    -0.11514    -0.78580999]
 [ 0.48251     0.87746    -0.23455    ..., -0.41119999  0.23625     0.26451001]
 [-0.22258     0.089765    0.45857    ..., -0.23081     0.37729001
  -0.41736999]]
[[ 0.17698     0.065221    0.28547999 ..., -0.29776001  0.11035     0.22057   ]
 [ 0.27061999 -0.36596     0.097193   ..., -0.75256002 -0.17335001 -0.22587   ]
 [ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 ..., 
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]
 [ 0.26357999  0.18747     0.044394   ..., -0.42936     0.52879    -0.12598   ]
 [ 0.55024999 -0.24942    -0.0009386  ...,  0.33763     0.050139
   0.334650

[[ 0.49860999 -0.12284     0.44771999 ...,  0.26394999 -0.062214
   0.62919998]
 [ 0.48642001  0.019708    0.16084    ..., -0.050574    0.19356     0.17562   ]
 [ 0.15272     0.36181    -0.22168    ...,  0.43382001 -0.084617    0.1214    ]
 ..., 
 [ 0.19253001  0.10006     0.063798   ...,  0.081191   -0.30485001 -0.30513   ]
 [ 0.70853001  0.57088    -0.4716     ..., -0.22562    -0.093918
  -0.80374998]
 [ 0.64642    -0.55599999  0.47038001 ..., -0.35831001 -0.10995    -0.447     ]]
[[ 0.53074002  0.40116999 -0.40785    ...,  0.28762001  0.1444      0.23611   ]
 [ 0.079654    0.78386003 -0.13076    ..., -0.1175     -0.098727
   0.57669002]
 [ 0.48251     0.87746    -0.23455    ..., -0.41119999  0.23625     0.26451001]
 ..., 
 [ 0.91101998 -0.22871999  0.2077     ...,  0.16692001 -0.050063
   0.19241001]
 [ 0.4084     -0.21289    -0.33160999 ...,  1.06260002 -0.19031     0.43584001]
 [-0.31905001 -0.09507    -0.049458   ..., -0.23004     0.081364   -0.035147  ]]
[[ 0.15272     0.36181  

[[ 0.60307997 -0.32023999  0.088857   ..., -0.25187001 -0.26879001  0.36657   ]
 [ 0.75138003 -0.15736     0.47207999 ...,  0.23532    -0.26337999 -0.16202   ]
 [ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 ..., 
 [ 0.41519001  0.13167    -0.0569     ..., -0.11806    -0.34498999
  -0.86949003]
 [-0.82643002  1.09909999 -1.10039997 ...,  0.45311999 -0.37187999
  -0.65143001]
 [-0.089298    0.54512    -1.1207     ...,  0.62721997 -0.73076999  0.25854   ]]
[[-0.53482002  0.51463997  0.01403    ..., -1.01380002  0.27698001  0.43006   ]
 [ 0.34147999  0.11925    -0.073513   ..., -0.48133999 -0.46849999  0.67534   ]
 [ 0.61849999  0.64253998 -0.46551999 ..., -0.27557001  0.30899     0.48497   ]
 ..., 
 [ 0.30045     0.25005999 -0.16692001 ..., -0.07131     0.23052    -0.51938999]
 [ 0.41800001  0.24968    -0.41242    ..., -0.18411    -0.11514    -0.78580999]
 [ 0.39118999  0.34992     0.22498    ..., -0.20466    -0.0059552
   0.17460001]]
[[-0.27279001  0.

[[  5.30740023e-01   4.01169986e-01  -4.07849997e-01   1.54440001e-01
    4.77820009e-01   2.07540005e-01  -2.69510001e-01  -3.40229988e-01
   -1.08790003e-01   1.05630003e-01  -1.02890000e-01   1.08489998e-01
   -4.96809989e-01  -2.51280010e-01   8.40250015e-01   3.89490008e-01
    3.22840005e-01  -2.27970004e-01  -4.43419993e-01  -3.16489995e-01
   -1.24059997e-01  -2.81699985e-01   1.94670007e-01   5.55129983e-02
    5.67049980e-01  -1.74189997e+00  -9.11450028e-01   2.70359993e-01
    4.19270009e-01   2.02789996e-02   4.04050016e+00  -2.49430001e-01
   -2.04160005e-01  -6.27619982e-01  -5.47830015e-02  -2.68830001e-01
    1.84440002e-01   1.82040006e-01  -2.35359997e-01  -1.61550000e-01
   -2.76549995e-01   3.55059989e-02  -3.82110000e-01  -7.51340005e-04
   -2.48219997e-01   2.81639993e-01   1.28189996e-01   2.87620008e-01
    1.44400001e-01   2.36110002e-01]
 [  6.18499994e-01   6.42539978e-01  -4.65519994e-01   3.75699997e-01
    7.48380005e-01   5.37389994e-01   2.22390005e-03 

[[ 0.41800001  0.24968    -0.41242    ..., -0.18411    -0.11514    -0.78580999]
 [ 0.66426003  0.71810001  0.17513999 ..., -0.32442999  0.42451999
  -0.70973998]
 [ 0.75068003  0.053127    0.054627   ...,  0.97435999  0.27401     0.10775   ]
 ..., 
 [-0.010419    0.18859001 -1.11029994 ..., -0.66228998  0.16455001
   0.51964998]
 [ 0.26357999  0.18747     0.044394   ..., -0.42936     0.52879    -0.12598   ]
 [-0.22492     0.69449002 -0.91219002 ...,  0.14253999  0.18313     0.86878997]]
[[ 0.52904999 -0.30145001  0.056191   ...,  0.067963   -0.10931     0.17161   ]
 [ 0.70853001  0.57088    -0.4716     ..., -0.22562    -0.093918
  -0.80374998]
 [-0.27279001  0.77515    -0.10181    ..., -0.73367     0.040383    0.26657   ]
 ..., 
 [-0.66508001  0.78362     0.74452001 ...,  0.25994     0.0064557  -0.19891   ]
 [ 1.23020005  1.00800002 -0.66763002 ...,  0.069793   -0.039322
  -0.33395001]
 [-0.24552999  0.92887002  0.94635999 ..., -0.75177002  0.42186999
   0.48225001]]
[[ 0.11891     0.1

[[-0.24922    -0.39835    -0.45851001 ..., -0.16507     0.45291001
   1.36689997]
 [-0.35585999  0.52130002 -0.61070001 ...,  0.27206999  0.31305     0.92771   ]
 [ 0.15187     0.3633     -0.15752    ..., -0.52201003 -0.28400001
   1.14569998]
 ..., 
 [ 0.022106   -0.37986001 -0.24854    ...,  0.1363     -0.31229001
   1.10790002]
 [ 0.51292002  0.09032     0.023552   ...,  1.15849996  0.29796001
   0.075048  ]
 [ 0.023693    0.13316     0.023131   ...,  0.34584999  0.24201     0.82236999]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [-0.13886     1.1401     -0.85211998 ..., -1.37109995 -0.28606001
   0.28979999]
 [ 0.53074002  0.40116999 -0.40785    ...,  0.28762001  0.1444      0.23611   ]
 ..., 
 [ 0.074381    0.1127     -1.05589998 ...,  0.12434     0.34889001  0.48049   ]
 [-0.49491999 -0.44402    -0.22972    ..., -0.53130001  0.73872     1.31219995]
 [-0.034772    0.64144999 -0.37351999 ..., -0.23396    -0.28784001
   0.98359001]]
[[ 0.41800

[[-0.20384     0.47055     0.2168     ..., -0.64949    -1.89979994
   0.066941  ]
 [ 0.26818001  0.14346001 -0.27877    ..., -0.63209999 -0.25027999 -0.38097   ]
 [-0.55080003  0.84467     0.038512   ..., -1.43429995 -0.25077999
   1.00820005]
 ..., 
 [ 0.77604002  0.22584     0.45043999 ..., -0.97437    -0.78565001
  -0.81770003]
 [-0.35585999  0.52130002 -0.61070001 ...,  0.27206999  0.31305     0.92771   ]
 [-0.034772    0.64144999 -0.37351999 ..., -0.23396    -0.28784001
   0.98359001]]
[[ 0.55024999 -0.24942    -0.0009386  ...,  0.33763     0.050139
   0.33465001]
 [ 0.42974001 -0.24432001 -0.58494997 ...,  0.062692    0.12234     0.83653998]
 [-0.27279001  0.77515    -0.10181    ..., -0.73367     0.040383    0.26657   ]
 ..., 
 [ 0.68937999 -0.10644     0.17083    ...,  0.41760999 -0.22504     0.61412001]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]
 [ 0.83393002  0.16421001 -0.045411   ...,  0.086267   -0.34259     0.6577    ]]
[[  3.68160009e-

[[ -3.58480006e-01  -1.15500003e-01   1.13710001e-01   4.68140006e-01
    7.49499977e-01  -6.15230024e-01   4.76390004e-01   9.07540023e-02
    3.68900001e-01   5.03310025e-01  -2.24669993e-01   2.33999997e-01
   -6.49010003e-01   5.56670018e-02   3.01919997e-01  -1.35360003e-01
    9.34729993e-01   8.86770010e-01  -7.07560003e-01  -4.84079987e-01
   -9.06250000e-01   6.23139977e-01  -1.87930003e-01  -5.10200024e-01
    1.25650001e+00  -2.88969994e-01  -1.28190005e+00   3.02839994e-01
    1.04229999e+00  -5.48850000e-01   1.00539994e+00   6.20530009e-01
    3.18789989e-01  -6.08219989e-02  -2.49190003e-01   5.01900017e-01
    4.11709994e-01   1.36480004e-01  -4.98149991e-01  -5.98219991e-01
   -1.68760002e-01  -2.60960013e-01  -5.32830000e-01   2.00829998e-01
   -1.90950006e-01  -2.86929999e-02   9.08429995e-02  -1.10629998e-01
   -4.08580005e-02   8.84389997e-01]
 [  1.18910000e-01   1.52549997e-01  -8.20730031e-02  -7.41439998e-01
    7.59169996e-01  -4.83280003e-01  -3.10090005e-01 

[[ -2.72790015e-01   7.75150001e-01  -1.01810001e-01  -9.16599989e-01
    9.04770017e-01  -7.05009997e-02  -4.75690007e-01   4.46079999e-01
    1.69699997e-01   7.23519996e-02  -1.63059995e-01   8.68520021e-01
   -7.66340017e-01  -1.61029994e-02   7.84919977e-01   2.95199990e-01
   -7.48589993e-01   2.09900007e-01   6.55369997e-01  -6.23340011e-01
   -4.37110007e-01   1.18540001e+00   4.75190014e-01   9.38660000e-03
    1.13769996e+00  -2.43939996e+00  -1.56190002e+00   4.90009993e-01
    1.09850001e+00  -9.73710001e-01   3.46280003e+00   1.04079998e+00
   -6.51380002e-01   5.71889997e-01  -1.25229999e-01   2.67049998e-01
    1.63729995e-01   4.11049992e-01   7.50899971e-01  -7.79229999e-01
    3.63800004e-02  -2.86089987e-01  -7.23649979e-01   6.35110021e-01
    8.94410014e-02  -3.01330000e-01   3.65179986e-01  -7.33669996e-01
    4.03829999e-02   2.66570002e-01]
 [  3.76480013e-01   1.24259996e+00  -3.97390008e-01  -5.31840026e-01
    1.18700004e+00   1.50909996e+00  -8.41709971e-01 

   -9.87270027e-02   5.76690018e-01]]
[[ 0.20782     0.12713    -0.30188    ..., -0.35301     0.05058     0.019495  ]
 [ 0.21705     0.46515    -0.46757001 ..., -0.043782    0.41012999  0.1796    ]
 [ 0.46955001  0.82963002 -0.13828    ..., -0.38839999 -0.81348002
  -0.086581  ]
 ..., 
 [ 0.81544     0.30171001  0.54720002 ...,  0.31964999 -0.34740999  0.41672   ]
 [ 0.55190998 -1.31040001  0.039413   ..., -0.38238999  0.21431001 -0.19289   ]
 [ 0.21122999  0.12455    -0.14519    ...,  0.60215002 -0.13619     0.21086   ]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 0.95528001 -0.01161     0.074285   ..., -0.26861     0.27789    -0.71996999]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]
 ..., 
 [ 0.26622999  0.46052     0.16411    ...,  0.05035    -0.52179003
  -0.62384999]
 [ 0.41800001  0.24968    -0.41242    ..., -0.18411    -0.11514    -0.78580999]
 [ 0.19965     0.18258999  0.45203    ...,  0.72434998 -0.908

[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 0.086888   -0.19416    -0.24267    ..., -0.76999998  0.39449999 -0.16937   ]
 [ 0.0016675  -0.16376001 -0.092648   ..., -0.024906   -0.069572
   1.13429999]
 ..., 
 [ 0.8678     -0.89626002  0.58137    ...,  0.59605002 -0.53266001
   1.27760005]
 [ 0.61849999  0.64253998 -0.46551999 ..., -0.27557001  0.30899     0.48497   ]
 [-0.026567    1.33570004 -1.028      ..., -0.49658    -0.41618001
  -0.25490001]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 0.94910997 -0.34968001  0.48124999 ...,  0.36232999 -0.72512001
  -0.60890001]
 [ 0.266       0.66825998 -0.20148    ..., -0.86220998 -0.49737999  0.13674   ]
 ..., 
 [ 0.32269001 -0.11823     0.15135001 ...,  0.36480001 -0.18610001
  -0.34685999]
 [ 0.60079998  0.18043999  0.078339   ..., -0.016404   -0.65372002 -0.38255   ]
 [ 0.16962001  0.43439999 -0.042106   ..., -0.50107998 -0.77039999
  -0.32234001]]
[[ 0.11891

[[  3.61400008e-01  -5.51379979e-01  -7.00540006e-01  -3.03070009e-01
    4.26660001e-01   1.17969997e-01   7.04990029e-01   3.94870013e-01
   -1.29309997e-01   2.91909993e-01  -7.23060012e-01   1.80830006e-02
   -1.05250001e+00  -6.07069992e-02   1.72249997e+00  -4.15470004e-01
    1.78250000e-02   1.13270000e-01  -1.40579998e-01   5.09269983e-02
   -6.59269989e-01   7.41959989e-01   2.35320002e-01  -1.06059998e-01
    9.85549986e-01  -7.77649999e-01  -1.70589995e+00   5.19879997e-01
    1.94229996e+00  -3.42449993e-01   1.16310000e+00   3.60229999e-01
    3.28460008e-01  -6.63360000e-01  -5.22979975e-01   7.89550006e-01
    5.16610026e-01  -5.74140012e-01   3.27520013e-01   1.97390005e-01
   -1.12860000e+00  -2.12479994e-01   3.19139987e-01   2.98619986e-01
    6.59300029e-01   5.15579998e-01   6.43329993e-02   2.39109993e-01
    6.07089996e-01   4.45160002e-01]
 [ -1.24629997e-01  -3.60909998e-01  -8.86569977e-01  -3.19559991e-01
    6.02119982e-01  -1.23619996e-02   3.68209988e-01 

[[  5.97180009e-01   2.26030007e-01  -3.99049997e-01  -3.11100006e-01
    1.18280005e+00  -1.68620005e-01  -4.99639988e-01   3.16399992e-01
   -6.79030001e-01   4.17879999e-01  -4.63330001e-01  -3.66259992e-01
    7.40450025e-01   1.84210002e-01   7.07000017e-01  -1.84139997e-01
    2.26099998e-01   2.74820000e-01  -2.85360008e-01  -1.03989995e+00
   -8.12060013e-02   6.11699998e-01  -3.33319992e-01   8.73109996e-01
    1.54449999e+00  -6.25909984e-01  -8.05149972e-01   4.94639993e-01
    5.46289980e-01  -3.62370014e-01   1.04779994e+00  -6.00589998e-02
    3.10770005e-01  -1.08529997e+00  -2.89779991e-01  -6.42300025e-02
   -4.28000003e-01  -8.47409964e-02  -4.88700002e-01  -3.76210004e-01
    1.79089993e-01   2.64400002e-02  -5.29780015e-02   7.85939991e-01
    6.70199990e-01  -1.68990001e-01   2.99089998e-01   3.26959997e-01
    2.94640005e-01   5.43820001e-02]
 [ -3.81099999e-01   1.14390004e+00   2.28619993e-01  -6.97390020e-01
    6.81129992e-02   5.17870009e-01  -1.21190000e+00 

[[ 0.06177    -0.27821001  0.17239    ..., -0.15233     0.88287997  0.55618   ]
 [ 0.83153999 -0.16542999 -0.28922999 ..., -0.23478     0.15236001 -0.36216   ]
 [ 0.63800001 -0.58291    -1.92130005 ..., -0.68318999 -0.31977999
  -0.058325  ]
 ..., 
 [ 0.95108998 -1.28799999  0.22052    ..., -1.53770006 -0.053538
  -0.41980001]
 [ 0.33041999  0.24995001 -0.60873997 ..., -0.50703001 -0.027273
  -0.53285003]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 0.76190001 -0.29773     0.51396    ...,  0.43088001 -0.22768     0.40259999]
 [ 0.94910997 -0.34968001  0.48124999 ...,  0.36232999 -0.72512001
  -0.60890001]
 ..., 
 [ 0.12967999 -0.20314001 -0.30364999 ...,  0.95431     0.28044     0.026483  ]
 [ 0.40404001  0.38784999  0.53397    ...,  0.84719002 -0.55723     0.30022001]
 [-0.0049087   0.12611     0.14056    ..., -0.58464003 -0.31830999  0.31564   ]]
[[ 0.40990999  0.0

[[  2.17050001e-01   4.65149999e-01  -4.67570007e-01 ...,  -4.37819995e-02
    4.10129994e-01   1.79600000e-01]
 [ -1.31359994e-01   4.68250006e-01   7.51339972e-01 ...,   9.26600024e-02
    8.00939975e-04   8.70729983e-01]
 [  2.87010014e-01   5.46840012e-01  -3.29820007e-01 ...,   8.84279981e-02
   -2.18830004e-01   1.33389995e-01]
 ..., 
 [ -3.55859995e-01   5.21300018e-01  -6.10700011e-01 ...,   2.72069991e-01
    3.13050002e-01   9.27709997e-01]
 [ -4.34520006e-01   4.75100011e-01   1.37769997e+00 ...,  -1.80130005e-01
    3.26700002e-01   1.56149998e-01]
 [  2.87010014e-01   5.46840012e-01  -3.29820007e-01 ...,   8.84279981e-02
   -2.18830004e-01   1.33389995e-01]]
[[ 0.96192998  0.012516    0.21732999 ...,  0.14032    -0.38468    -0.38712001]
 [ 0.70835    -0.57361001  0.15375    ..., -0.23113    -0.31217    -0.30489999]
 [ 0.68997002  0.54491001 -0.044471   ...,  0.054471    1.04229999
   0.35811001]
 ..., 
 [ 0.14827999  0.17761     0.42346001 ..., -0.2182      0.12971     0.3

[[ 0.65101999  0.0025814   0.45798999 ...,  0.21574    -0.32969999 -0.18945   ]
 [-0.19461    -0.051277    0.26445001 ..., -0.67102998 -0.21652    -0.025891  ]
 [-0.34046    -0.1041      0.12251    ..., -0.80032998 -0.49737     0.23571999]
 ..., 
 [-0.19425    -0.47251999 -0.23047    ..., -0.069976    0.81379998  0.10541   ]
 [ 0.78556001  1.28680003 -1.18439996 ..., -0.56884998  0.60583001
   0.073066  ]
 [ 0.41036999  0.11342     0.051524   ..., -0.36063999 -0.19616    -0.81066   ]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [-0.13886     1.1401     -0.85211998 ..., -1.37109995 -0.28606001
   0.28979999]
 [ 0.41800001  0.24968    -0.41242    ..., -0.18411    -0.11514    -0.78580999]
 ..., 
 [ 0.27487001  0.097118   -0.71039999 ...,  0.075282   -0.19070999
   0.098589  ]
 [ 0.27366999  0.37357     0.084406   ...,  0.081863   -0.63350999
   1.23810005]
 [ 0.90025997  0.23366    -0.95718002 ..., -0.17023    -0.51346999
   0.42972001]]
[[ 0.5379999

[[-0.051342   -0.28753     0.82754999 ..., -0.043632    0.60343999
   1.58029997]
 [ 0.26818001  0.14346001 -0.27877    ..., -0.63209999 -0.25027999 -0.38097   ]
 [-0.014547   -0.20208    -0.75278002 ..., -0.13428999  0.21133     1.53680003]
 ..., 
 [ 0.33041999  0.24995001 -0.60873997 ..., -0.50703001 -0.027273
  -0.53285003]
 [ 0.63042003  0.75208002  0.11595    ...,  0.54089999 -0.11725     0.35303   ]
 [ 0.81143999  0.37619999 -0.0029111  ..., -0.23817    -0.53477001
   0.94383001]]
[[  9.55280006e-01  -1.16100004e-02   7.42850006e-02 ...,  -2.68610001e-01
    2.77889997e-01  -7.19969988e-01]
 [  8.94659996e-01   3.66039991e-01   3.75880003e-01 ...,   5.59979975e-01
   -6.65929973e-01   1.21770002e-01]
 [  1.33230002e-03   7.49629974e-01  -7.57830024e-01 ...,  -6.43790007e-01
    4.31599983e-05   1.62660003e-01]
 ..., 
 [  3.30419987e-01   2.49950007e-01  -6.08739972e-01 ...,  -5.07030010e-01
   -2.72729993e-02  -5.32850027e-01]
 [  7.67189980e-01   1.23899996e-01  -1.11189999e-01 

[[-0.13886     1.1401     -0.85211998 ..., -1.37109995 -0.28606001
   0.28979999]
 [ 0.53074002  0.40116999 -0.40785    ...,  0.28762001  0.1444      0.23611   ]
 [-0.034772    0.64144999 -0.37351999 ..., -0.23396    -0.28784001
   0.98359001]
 ..., 
 [ 0.13223     0.097975    0.61232001 ..., -0.076126    0.52677     0.45174   ]
 [ 0.33041999  0.24995001 -0.60873997 ..., -0.50703001 -0.027273
  -0.53285003]
 [-0.14524999  0.31265     0.15184    ..., -0.50895     0.14322001
   1.01180005]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [-0.31384    -0.23372     0.73334002 ..., -0.38913    -0.42574    -0.20341   ]
 [ 0.53074002  0.40116999 -0.40785    ...,  0.28762001  0.1444      0.23611   ]
 ..., 
 [ 0.63489997  0.55422997  0.93537003 ...,  0.68331999 -0.34566     0.54461998]
 [ 0.15272     0.36181    -0.22168    ...,  0.43382001 -0.084617    0.1214    ]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]]
[[  2.77240008e-

[[  1.18910000e-01   1.52549997e-01  -8.20730031e-02  -7.41439998e-01
    7.59169996e-01  -4.83280003e-01  -3.10090005e-01   5.14760017e-01
   -9.87079978e-01   6.17570011e-04  -1.50429994e-01   8.37700009e-01
   -1.07969999e+00  -5.14599979e-01   1.31879997e+00   6.20069981e-01
    1.37789994e-01   4.71080005e-01  -7.28740022e-02  -7.26750016e-01
   -7.41159976e-01   7.52629995e-01   8.81799996e-01   2.95610011e-01
    1.35479999e+00  -2.57010007e+00  -1.35230005e+00   4.58799988e-01
    1.00680006e+00  -1.18560004e+00   3.47370005e+00   7.78980017e-01
   -7.29290009e-01   2.51020014e-01  -2.61559993e-01  -3.46839994e-01
    5.58409989e-01   7.50980020e-01   4.98299986e-01  -2.68229991e-01
   -2.74430006e-03  -1.82980001e-02  -2.80959994e-01   5.53179979e-01
    3.77059989e-02   1.85550004e-01  -1.50250003e-01  -5.75119972e-01
   -2.66710013e-01   9.21209991e-01]
 [ -1.38860002e-01   1.14010000e+00  -8.52119982e-01  -2.92120010e-01
    7.55339980e-01   8.27620029e-01  -3.18100005e-01 

[[  5.70490003e-01  -7.78539991e-03  -7.07660019e-01  -3.17849994e-01
    8.94930005e-01  -1.61279999e-02  -6.71489984e-02   1.57649994e-01
   -4.98320013e-01   2.58450001e-01   1.09430000e-01   3.67280006e-01
   -1.48430005e-01   6.32859990e-02   2.08320007e-01   4.59199995e-01
    7.17809975e-01   2.27720007e-01  -1.53490005e-03  -9.30930018e-01
   -8.00480008e-01   4.67139989e-01   4.15710002e-01   1.75720006e-01
    1.08759999e+00  -1.61160004e+00  -7.09429979e-01   8.37719977e-01
    6.70809984e-01   1.81390002e-01   3.98990011e+00  -1.02700002e-01
    4.39000010e-01  -6.79260015e-01   1.18610002e-01  -2.01820001e-01
   -8.16029981e-02   9.07389998e-01  -5.22580028e-01  -4.84259993e-01
   -3.13259989e-01   1.03249997e-01   1.30360007e-01   3.51150006e-01
    3.75930011e-01   6.43879995e-02  -2.25899994e-01   7.91250020e-02
    1.25729993e-01   8.39389980e-01]
 [ -1.45469997e-02  -2.02079996e-01  -7.52780020e-01  -2.02720001e-01
    7.01629996e-01   6.65350020e-01   1.73999995e-01 

[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 1.02460003 -0.84350002  0.30855    ..., -0.40020001 -0.14772999
  -0.021354  ]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]
 ..., 
 [ 0.27847999  0.079579   -0.10584    ..., -0.096195   -0.071975
  -0.22772001]
 [ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [-0.23519    -0.13223     0.24955    ...,  0.25870001  0.25876999  1.1602    ]]
[[-0.27279001  0.77515    -0.10181    ..., -0.73367     0.040383    0.26657   ]
 [ 0.57651001  1.13960004 -0.21861    ..., -2.07240009  0.23199999  0.37039   ]
 [ 0.26818001  0.14346001 -0.27877    ..., -0.63209999 -0.25027999 -0.38097   ]
 ..., 
 [ 0.49860999 -0.12284     0.44771999 ...,  0.26394999 -0.062214
   0.62919998]
 [ 0.22678    -0.14627001 -0.34042001 ...,  0.082052    0.045228    0.63316   ]
 [ 0.042454    0.12552001 -0.066073   ...,  0.5478      0.32894999  0.65252   ]]
[[ 0.266       0.6

[[  4.18000013e-01   2.49679998e-01  -4.12420005e-01   1.21699996e-01
    3.45270008e-01  -4.44569997e-02  -4.96879995e-01  -1.78619996e-01
   -6.60229998e-04  -6.56599998e-01   2.78430015e-01  -1.47670001e-01
   -5.56770027e-01   1.46579996e-01  -9.50950012e-03   1.16579998e-02
    1.02040000e-01  -1.27920002e-01  -8.44299972e-01  -1.21809997e-01
   -1.68009996e-02  -3.32789987e-01  -1.55200005e-01  -2.31309995e-01
   -1.91809997e-01  -1.88230002e+00  -7.67459989e-01   9.90509987e-02
   -4.21249986e-01  -1.95260003e-01   4.00710011e+00  -1.85939997e-01
   -5.22870004e-01  -3.16810012e-01   5.92130003e-04   7.44489999e-03
    1.77780002e-01  -1.58969998e-01   1.20409997e-02  -5.42230010e-02
   -2.98709989e-01  -1.57490000e-01  -3.47579986e-01  -4.56370004e-02
   -4.42510009e-01   1.87849998e-01   2.78489990e-03  -1.84110001e-01
   -1.15139998e-01  -7.85809994e-01]
 [ -5.21560013e-01   6.38019979e-01  -1.47459996e+00  -4.64309990e-01
    1.30850002e-01   1.51419997e-01   3.46689999e-01 

[[ 0.21705     0.46515    -0.46757001 ..., -0.043782    0.41012999  0.1796    ]
 [-0.45041999  0.58034003  0.1187     ..., -0.16169    -0.81639999
   1.80359995]
 [-1.59529996  0.36983001  0.70472002 ..., -0.48787999 -0.50444001
   0.34229001]
 ..., 
 [-0.32922    -0.94793999 -1.13750005 ...,  0.21086     0.59954     0.90969002]
 [-0.051524    0.25137001 -0.14722    ..., -0.40707001  0.0065966
  -0.61168998]
 [-0.88186002  0.056442   -0.81972998 ..., -0.14658     0.8448      0.49233001]]
[[-0.40430999  0.78002    -0.67537999 ...,  0.72254997  0.47589999
   0.79514003]
 [-1.01059997  1.21580005 -0.27390999 ...,  0.033268   -0.55925     0.17686   ]
 [ 0.023332    0.42157999  0.40608999 ..., -0.63915002 -0.24518     0.1041    ]
 ..., 
 [ 0.61849999  0.64253998 -0.46551999 ..., -0.27557001  0.30899     0.48497   ]
 [-1.07229996  0.53650999 -1.09889996 ..., -0.65122998 -0.46650001
   0.37444001]
 [ 0.32111999 -0.69305998  0.47922    ..., -0.43547001 -0.1108     -0.58499998]]
[[ -1.38860002e

[[ 0.53074002  0.40116999 -0.40785    ...,  0.28762001  0.1444      0.23611   ]
 [ 0.61849999  0.64253998 -0.46551999 ..., -0.27557001  0.30899     0.48497   ]
 [ 0.36142999  0.58614999 -0.23717999 ...,  0.39362001  0.36522999
   0.36076999]
 ..., 
 [ 0.011405   -0.53005999 -0.31174001 ...,  0.76414001  0.1025      0.088056  ]
 [ 0.52735001 -0.61475003 -0.42524001 ...,  1.16630006 -0.10517     1.20070004]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]]
[[ 0.57387    -0.32729     0.070521   ...,  0.48758999 -0.18438999
   0.69938999]
 [-0.13886     1.1401     -0.85211998 ..., -1.37109995 -0.28606001
   0.28979999]
 [-0.0049087   0.12611     0.14056    ..., -0.58464003 -0.31830999  0.31564   ]
 ..., 
 [ 0.0016675  -0.16376001 -0.092648   ..., -0.024906   -0.069572
   1.13429999]
 [-0.48297    -0.34784999 -0.064558   ..., -0.77156001  0.84885001
  -0.052487  ]
 [ 0.19893999 -0.65570003  1.03649998 ...,  0.67561001 -1.03760004
   0.72071999]]
[[ -2.65670009

[[  4.82919991e-01  -1.05889998e-01  -4.87549990e-01  -7.24990010e-01
   -3.34279984e-02   6.46210015e-01  -2.87779987e-01  -2.82349996e-02
   -6.72089994e-01   2.78789997e-01  -8.58059973e-02   3.92580003e-01
    3.70970011e-01  -8.95399973e-03   2.70410001e-01   1.24370001e-01
    4.55599986e-02  -1.15470000e-01   1.82400003e-01  -1.09340000e+00
   -2.68570006e-01   1.23460002e-01   6.80760026e-01   6.05539978e-01
    3.26889992e-01  -1.47249997e+00  -7.18370020e-01   3.36329997e-01
    5.59010029e-01  -1.23649999e-01   3.63490009e+00  -4.04540002e-02
    4.91090000e-01  -9.94090021e-01   4.93919998e-02   5.06669998e-01
   -2.77179986e-01   1.73649997e-01  -7.47089982e-01   1.00359999e-01
   -1.39060006e-01   4.76790011e-01   7.36259967e-02   1.03989995e+00
    5.69610000e-01  -3.77770007e-01  -1.71869993e-01  -1.99499995e-01
   -2.51040012e-01  -1.72120005e-01]
 [ -5.30789971e-01  -2.19650000e-01   5.32679975e-01  -2.31439993e-01
    7.43390024e-02  -6.55049980e-02  -1.04209997e-01 

[[  1.18910000e-01   1.52549997e-01  -8.20730031e-02 ...,  -5.75119972e-01
   -2.66710013e-01   9.21209991e-01]
 [  2.66000003e-01   6.68259978e-01  -2.01480001e-01 ...,  -8.62209976e-01
   -4.97379988e-01   1.36739999e-01]
 [  5.30740023e-01   4.01169986e-01  -4.07849997e-01 ...,   2.87620008e-01
    1.44400001e-01   2.36110002e-01]
 ..., 
 [  2.07819998e-01   1.27130002e-01  -3.01880002e-01 ...,  -3.53009999e-01
    5.05799986e-02   1.94949992e-02]
 [  8.87329981e-04  -3.84229988e-01  -6.88549995e-01 ...,   4.41059992e-02
    5.47839999e-01   1.01460004e+00]
 [  2.31040001e-01   8.14260006e-01  -1.34280002e+00 ...,   3.54770005e-01
   -2.20449999e-01  -5.08360028e-01]]
[[ 0.17698     0.065221    0.28547999 ..., -0.29776001  0.11035     0.22057   ]
 [ 0.60224998 -0.036286   -0.31705001 ..., -0.055718    0.10712     0.69641   ]
 [ 0.68046999 -0.039263    0.30186    ..., -0.073297   -0.064699
  -0.26043999]
 ..., 
 [-0.10274    -0.79102999  1.15789998 ...,  0.37775999  0.089557
   0.496

[[ -2.65670009e-02   1.33570004e+00  -1.02800000e+00  -3.72900009e-01
    5.20120025e-01  -1.26990005e-01  -3.54330003e-01   3.78239989e-01
   -2.97160000e-01   9.38939974e-02  -3.41220014e-02   9.29610014e-01
   -1.40230000e-01  -6.32990003e-01   2.08010003e-02  -2.15330005e-01
    9.69229996e-01   4.76539999e-01  -1.00390005e+00  -2.40130007e-01
   -3.63249987e-01  -4.75700013e-03  -5.14800012e-01  -4.62599993e-01
    1.24469995e+00  -1.83159995e+00  -1.55809999e+00  -3.74650002e-01
    5.33620000e-01   2.08829999e-01   3.22090006e+00   6.45489991e-01
    3.74379992e-01  -1.76569998e-01  -2.41640005e-02   3.37859988e-01
   -4.19000000e-01   4.00810003e-01  -1.14490002e-01   5.12319990e-02
   -1.52050003e-01   2.98550010e-01  -4.40519989e-01   1.10890001e-01
   -2.46329993e-01   6.62509978e-01  -2.69490004e-01  -4.96580005e-01
   -4.16180015e-01  -2.54900008e-01]
 [ -1.31359994e-01   4.68250006e-01   7.51339972e-01   9.50510025e-01
   -3.88689995e-01  -4.42189984e-02  -9.34059978e-01 

[[-0.23763999  0.43119001 -0.72153997 ..., -0.47773999  0.37379     1.07819998]
 [ 0.43656999  0.031303    0.25946    ...,  0.43540001  0.64441001
   0.27816999]
 [-0.25725001  0.86611003 -1.20860004 ..., -0.34531    -0.2999      0.66193002]
 ..., 
 [ 0.28701001  0.54684001 -0.32982001 ...,  0.088428   -0.21883     0.13338999]
 [ 0.58288997  0.36258     0.34064999 ..., -0.16158    -0.35168999
  -0.82555002]
 [-0.068159    0.66481    -0.062478   ...,  0.82305998 -0.54023999
  -0.0017773 ]]
[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [-0.13886     1.1401     -0.85211998 ..., -1.37109995 -0.28606001
   0.28979999]
 [ 0.75541002  0.88516003  0.43915999 ..., -0.17528    -0.21658     0.21983001]
 ..., 
 [ 0.0016675  -0.16376001 -0.092648   ..., -0.024906   -0.069572
   1.13429999]
 [-0.23763999  0.43119001 -0.72153997 ..., -0.47773999  0.37379     1.07819998]
 [ 0.82773     0.24938001 -0.45247    ..., -0.54983997 -0.12805     0.23188999]]
[[ 0.28667    

[[ 0.11891     0.15255    -0.082073   ..., -0.57511997 -0.26671001
   0.92120999]
 [ 0.42761999 -0.11469     0.010506   ..., -0.034535   -0.041267
   0.25940001]
 [ 0.17698     0.065221    0.28547999 ..., -0.29776001  0.11035     0.22057   ]
 ..., 
 [-0.0046823  -0.084424    0.081132   ...,  0.045603   -0.12214    -0.036687  ]
 [ 0.61183    -0.22071999 -0.10898    ..., -0.043688   -0.097922    0.16806   ]
 [ 0.63427001  0.080184   -0.56338    ...,  0.89538997  0.30136999
  -0.29934001]]
[[ 0.26425999 -0.69826001 -0.26249999 ..., -0.0075595   0.16116001
   0.83459002]
 [ 0.39994001 -0.96034998 -0.36842999 ...,  0.47516     0.74608999
   0.80909002]
 [ 0.26818001  0.14346001 -0.27877    ..., -0.63209999 -0.25027999 -0.38097   ]
 ..., 
 [ 0.87361997 -0.13265     1.0862     ...,  0.42844    -0.38032001 -0.17093   ]
 [ 0.27691001  0.28744999 -0.29934999 ..., -0.09482    -0.21250001
   0.022074  ]
 [ 0.33076    -0.43869999 -0.32163    ...,  0.33311999 -0.22251999
   0.74457002]]
[[ 0.097055 

KeyboardInterrupt: 

In [18]:
print(matUserInputData[0][0])
print(matMovieInputData[0][0])

[ 0.095387   -0.16865    -0.11514    -0.51113999  0.38330999  0.22657999
 -0.78504002  0.67625999 -0.66856998  0.18847001  0.19963001  0.58350998
 -0.86133999 -0.39471999  1.15709996  0.51656997  0.11706     0.0062629
 -0.25929999 -0.33371001 -0.47957     0.62110001  0.66830999 -0.058046
  0.81304997 -2.34100008 -0.75436997  0.2167      0.78012002 -0.81361997
  2.9368      0.13466001 -0.38043001 -0.59614998 -0.093113   -0.2843
  0.28314     0.59790999 -0.20750999 -0.43841001 -0.34187001 -0.21166
 -0.082453    0.44007    -0.33649999 -0.091078   -0.45859    -0.42103001
 -0.53816998  0.13738   ]
[ 0.095387   -0.16865    -0.11514    -0.51113999  0.38330999  0.22657999
 -0.78504002  0.67625999 -0.66856998  0.18847001  0.19963001  0.58350998
 -0.86133999 -0.39471999  1.15709996  0.51656997  0.11706     0.0062629
 -0.25929999 -0.33371001 -0.47957     0.62110001  0.66830999 -0.058046
  0.81304997 -2.34100008 -0.75436997  0.2167      0.78012002 -0.81361997
  2.9368      0.13466001 -0.38043001 -

In [8]:
ratingsData = initRatingsOutputData(rawInputData, input_file="data/reviews_Amazon_Instant_Video_5.json")

In [9]:
class DeepCoNN():
    def __init__(self, embedding_size, hidden_size, u_seq_len, m_seq_len, filters=2, kernel_size=8, strides=6):
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.filters = filters
        self.kernel_size = kernel_size
        self.inputU, self.towerU = self.create_deepconn_tower(u_seq_len)
        self.inputM, self.towerM = self.create_deepconn_tower(m_seq_len)
        self.joined = Concatenate()([self.towerU, self.towerM])
        self.outNeuron = Dense(1)(self.joined)

    def create_deepconn_tower(self, max_seq_len):
        input_layer = Input(shape=(max_seq_len, self.embedding_size))
        tower = Conv1D(filters=self.filters, kernel_size=self.kernel_size, activation="relu")(input_layer)
        tower = MaxPooling1D()(tower)
        tower = Flatten()(tower)
        tower = Dense(self.hidden_size, activation="relu")(tower)
        return input_layer, tower

    def create_deepconn_dp(self):
        dotproduct = Dot(axes=1)([self.towerU, self.towerM])
        output = Add()([self.outNeuron, dotproduct])
        self.model = Model(inputs=[self.inputU, self.inputM], outputs=[output])
        self.model.compile(optimizer='Adam', loss='mse')
        
    def train(self, matUserInputData, matItemInputData, ratingsData, u_seq_len=200, i_seq_len=200, epochs=3500, training=None):
        
        tensorboard = TensorBoard(log_dir="tf_logs/{}".format(time()))
        self.create_deepconn_dp()
        
        self.user_input = pad_sequences(np.asarray(matUserInputData), maxlen=u_seq_len)
        self.item_input = pad_sequences(np.asarray(matItemInputData), maxlen=i_seq_len)

        self.trainingN = int(len(user_input) * training) if type(training) is float else training

        self.outputs = np.asarray(ratingsData)
        print(self.model.summary())

        self.train_inputs = [self.user_input[:self.trainingN], self.item_input[:self.trainingN]]
        self.train_outputs = self.outputs[:self.trainingN]
        self.test_inputs = [self.user_input[self.trainingN:], self.item_input[self.trainingN:]]
        self.test_outputs = self.outputs[self.trainingN:]

        early_stopping = EarlyStopping(monitor='loss', patience=4)
        early_stopping_val = EarlyStopping(monitor='val_loss', patience=6)
        batch_size = 32
        
        self.history = self.model.fit(self.train_inputs, self.train_outputs, callbacks=[early_stopping, early_stopping_val, tensorboard], validation_split=0.2, batch_size=batch_size, epochs=epochs)
        self.predicts = self.model.predict(self.test_inputs)
        

In [None]:
# Calculates median user review length and item length. We then pad each review to these numbers
ptile = 50
u_seq_len = int(np.percentile(np.array(extra_info['user_seq_sizes']), ptile))
i_seq_len = int(np.percentile(np.array(extra_info['item_seq_sizes']), ptile))
embed_dims = matUserInputData[0].shape[1]
hidden_size = 64
deepconn = DeepCoNN(embed_dims, hidden_size, u_seq_len, i_seq_len)

deepconn.train(matUserInputData, matMovieInputData, ratingsData, 
           u_seq_len=u_seq_len, i_seq_len=i_seq_len,
           epochs=20, training=None)