### Load Libraries

In [None]:
from autograd import numpy as np
from autograd import grad
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from datetime import datetime
from multiprocessing import Pool
from sklearn.model_selection import train_test_split
import h5py
import sys
import autograd.numpy.random as npr
from autograd.misc.optimizers import adam
np.random.seed(7)

### Defining Defaults and Common Functions

In [None]:
datadir = './datafiles/'

In [None]:
def OWrite(s):
    print(s)
    sys.stdout.flush()
    
def saveh5py(hdata,hname):
    h5f = h5py.File(datadir+hname, 'w')
    h5f.create_dataset('dataset', data=hdata)
    h5f.close()

def readh5py(hname):
    h5f = h5py.File(datadir+hname,'r')
    hdata = h5f['dataset'][:]
    h5f.close()
    return hdata

def gen_dmatrix(data,matrix_shape):
    initTime = datetime.now()
    dmatrix = np.zeros(matrix_shape)
    for item in data:
        dmatrix[item[0],item[1]] = item[2]
    saveh5py(dmatrix,'dmatrix.h5')
    OWrite ("Time spent on computing data matrix: "+str(datetime.now() - initTime))
    return dmatrix

In [None]:
class RecSys():
    def __init__(self,args):
        self.alpha       = args['alpha']
        self.L           = args['L']
        self.max_epochs  = args['max_epochs']
        self.xtrain      = args['xtrain']
        self.num_users   = len(args['unique_cust_ids_list'])
        self.num_movies  = len(args['unique_movie_ids_list'])
        self.dmatrix     = gen_dmatrix(self.xtrain,(self.num_users,self.num_movies))
        self.UL = np.random.normal(scale=1./self.L, size=(self.num_users, self.L))
        self.ML = np.random.normal(scale=1./self.L, size=(self.num_movies, self.L))
        self.BU = np.zeros(self.num_users)
        self.BM = np.zeros(self.num_movies)
        self.b  = np.mean(xtrain[:,-1])

    def model(self,params):
        ul,ml,bu,bm = params
        return self.b + bu[:,np.newaxis] + bm[np.newaxis:,] + ul.dot(ml.T)

    def squared_error(self,params,y):
        return (np.square(y-model(params)))
    
    def gradient_descent(self):
        for xtem in self.xtrain:
            params = (self.UL[xtem[0]],self.ML[xtem[1]],self.BU[xtem[0]],self.BM[xtem[1]])
            params = params - (self.alpha*self.gradient_fn(params,xtem[2]))
        
    def train(self):
        self.gradient_fn = grad(self.squared_error,0)
        for i_e in range(self.max_epochs):
            np.random.shuffle(self.xtrain)
            gradient_descent()
            avg_error = overall_mse()
            lossdata.append(avg_error)
            OWrite("Epoch: {} \t MSE: {:.4f}".format(i_e+1,avg_error))
        return lossdata
            
            
    def predict_matrix(self):
        return self.b + self.BU[:,np.newaxis] + self.BM[np.newaxis:,] + self.UL.dot(self.ML.T)
    
    def overall_mse(self):
        xs, ys = self.dmatrix.nonzero()
        pred_matrix = self.predict_matrix()
        return np.sqrt(sum([pow(self.dmatrix[x,y]-pred_matrix[x,y],2) for x,y in zip(xs,ys)]))

In [None]:
data                 = readh5py('converted_final_data.h5')
global_var_cust_ids  = np.genfromtxt(datadir+'final_custids.csv',dtype=int)
global_var_movie_ids = np.genfromtxt(datadir+'final_movieids.csv',dtype=int)

In [None]:
OWrite ("Splitting data to train and test sets")
xtrain, xtest = train_test_split(data, test_size=0.1, random_state=7)
saveh5py(xtrain,'traindata.h5')
saveh5py(xtest,'testdata.h5')
OWrite ("Shape of training data: "+str(xtrain.shape))
OWrite ("Shape of test data: "+str(xtest.shape))

In [None]:
args = {'alpha'     : 0.001,
        'L'         : 20,
        'xtrain'    : xtrain,
        'max_epochs': 100,
        'unique_cust_ids_list' : np.copy(global_var_cust_ids),
        'unique_movie_ids_list': np.copy(global_var_movie_ids) ,
       }

In [None]:
'''
rs = npr.RandomState(0)
a = rs.randn(6,2)
b = rs.randn(5,1)
print (a,b)
def tmp_fn(params,ic):
    ia,ib = params
    return pow(pow(ia[0],2)+pow(ia[1],2)+pow(ib[0],3)+5. - ic,2)
tmp_grad = grad(tmp_fn,0)
c=(a[1],b[2])
print(tmp_grad(c,4))
c=(a[1],b[3])
print(tmp_grad(c,4))
print (c+tmp_grad(c,4)) 
'''
print ()