## Deep Learning Model Test
Please note that this whole folder named `Shakedown` was designed to run modeling tests to be launched in our production enviroment after all test requirents had been met successfuly. This initial test we will build a deep learning model movie recommendations system with a small 1M dataset from [MovieLens](https://grouplens.org/datasets/movielens/).

##### Import Libraries

In [14]:
import math 
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 

In [15]:
import sys
print(sys.executable)

/Users/thomaslee/anaconda3/envs/tensor_two/bin/python


##### Reading files and loading datasets

In [16]:
# Reading ratings file
ratings = pd.read_csv('ratings.csv', sep='\t', encoding='latin-1', 
                      usecols=['user_id', 'movie_id', 'user_emb_id', 'movie_emb_id', 'rating'])
max_userid = ratings['user_id'].drop_duplicates().max()
max_movieid = ratings['movie_id'].drop_duplicates().max()

# Reading ratings file
users = pd.read_csv('users.csv', sep='\t', encoding='latin-1', 
                    usecols=['user_id', 'gender', 'zipcode', 'age_desc', 'occ_desc'])

# Reading ratings file
movies = pd.read_csv('movies.csv', sep='\t', encoding='latin-1', 
                     usecols=['movie_id', 'title', 'genres'])

### Matrix Factorization for Collaborative Filtering¶

In [17]:
# Create training set
shuffled_ratings = ratings.sample(frac=1., random_state=13)

# Shuffling users
Users = shuffled_ratings['user_emb_id'].values
print('Users:', Users, ', shape =', Users.shape)

# Shuffling movies
Movies = shuffled_ratings['movie_emb_id'].values
print('Movies:', Movies, ', shape =', Movies.shape)

# Shuffling ratings
Ratings = shuffled_ratings['rating'].values
print('Ratings:', Ratings, ', shape =', Ratings.shape)

Users: [4407 3752 4139 ... 1541 4212 3024] , shape = (1000209,)
Movies: [1306 1265  371 ... 2807  109 2945] , shape = (1000209,)
Ratings: [3 5 3 ... 4 5 5] , shape = (1000209,)


### Deep Learning Model 

#### Build the Model 

In [26]:
# Import Keras libraries
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.layers import Embedding, Reshape
from keras.models import Sequential

In [65]:
class CFModel(Sequential):

    # The constructor for the class
    def __init__(self, n_users, m_items, k_factors, **kwargs):
        # P is the embedding layer that creates an User by latent factors matrix.
        # If the intput is a user_id, P returns the latent factor vector for that user.
        P = Sequential()
        P.add(Embedding(n_users, k_factors, input_length=1))
        P.add(Reshape((k_factors,)))

        # Q is the embedding layer that creates a Movie by latent factors matrix.
        # If the input is a movie_id, Q returns the latent factor vector for that movie.
        Q = Sequential()
        Q.add(Embedding(m_items, k_factors, input_length=1))
        Q.add(Reshape((k_factors,)))

        super(CFModel, self).__init__(**kwargs)

        # The Merge layer takes the dot product of user and movie latent factor vectors to return the corresponding rating.
        #self.keras.layers.Add([P, Q], mode='dot', dot_axes=1)

    # The rate function to predict user's rating of unrated items
    def rate(self, user_id, item_id):
        return self.predict([np.array([user_id]), np.array([item_id])])[0][0]

In [66]:
K_FACTORS = 100 # The number of dimensional embeddings for movies and users
TEST_USER = 2000 # A random test user (user_id = 2000)

In [67]:
# Define model
model = CFModel(max_userid, max_movieid, K_FACTORS)
# Compile the model using MSE as the loss function and the AdaMax learning algorithm
model.compile(loss='mse', optimizer='adamax')

In [68]:
# Callbacks monitor the validation loss
# Save the model weights each time the validation loss has improved
callbacks = [EarlyStopping('val_loss', patience=2), 
             ModelCheckpoint('weights.h5', save_best_only=True)]

# Use 30 epochs, 90% training data, 10% validation data 
history = model.fit([Users, Movies], Ratings, nb_epoch=30, validation_split=.1, verbose=2, callbacks=callbacks)

  import sys


AttributeError: 'CFModel' object has no attribute '_output_tensor_cache'