In [38]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import zscore


def read_user_id():
    with open('./input.txt', 'r') as f:
        return [l.strip().split(',') for l in  f.readlines()]


def write_output(prediction):
    with open('./output.txt', 'w') as f:
        for pred in prediction:
            f.write(pred+"\n")


def do(inputs):
    string_results = []
    for user, movie in inputs:
        key_user = int(user)
        key_movie = int(movie)
    return string_results


def initialize_train_data():
    df_train = pd.read_csv('data/ratings_train.csv', usecols = ['userId', 'movieId', 'rating'])
    return df_train


def initialize_valid_data():
    targets = pd.read_csv('data/ratings_vali.csv', usecols = ['userId', 'movieId', 'rating'])
    return targets


def preprocess_by_user(dataframe):
    dataframe['rating']  = dataframe.groupby(['userId']).rating.transform(lambda x : zscore(x, ddof=1))
    return dataframe

def RMSE(predictions, targets):
    return np.sqrt(((predictions - targets)**2).mean())




if __name__ == "__main__":
    global df_train
    df_train = initialize_train_data()
    df_valid = initialize_valid_data()
    norm_train = preprocess_by_user(df_train)
    norm_valid = preprocess_by_user(df_valid)

In [39]:
movieIds = sorted(norm_train['movieId'].unique())
userIds = sorted(norm_train['userId'].unique())

user_item = pd.DataFrame(index=sorted(movieIds), columns=sorted(userIds) )
for index, rows in  norm_train.iterrows():
     user_item.loc[rows['movieId']][rows['userId']] = rows['rating']
user_item = user_item.fillna(0.0).to_numpy()


In [37]:
import tensorflow as tf
import numpy as np
import sys
import os
import heapq
import math

class DeepFM:
    def __init__(self):
        self.train = norm_train
        self.valid = norm_valid
        self.add_embedding_layer()
        self.add_placeholders()
        # initialize tuning options 
        self.user_layer = [512, 64]
        self.item_layer = [1024, 64]
        self.lr = 0.0001
        self.max_epochs = 50
        self.batch_size = 256
        self.topK = 10
        # initialize layers
        self.add_embedding_layer()
        self.add_loss()
        self.add_train_step()
        self.checkPoint = args.checkPoint
        self.init_sess()

        
    def add_placeholder(self):
        self.user = tf.placeholder(tf.int32)
        self.item  = tf.placeholder(tf.int32)
        self.rate = tf.placeholder(tf.float32)
        self.drop = tf.placeholder(tf.float32)
        
        
    def add_embedding_matrix(self):
        self.user_embedding = tf.convert_to_tensor(item_user)
        self.item_embedding = tf.transpose(self.user_embedding)
        
        
    def add_embedding_layer(self):
        user_input = tf.nn.embedding_lookup(self.user_embedding, self.user)
        item_input = tf.nn.embedding_lookup(self.item_embedding, self.item)
        
        def init_variable(shape, name):
            return tf.Variable(tf.truncated_normal(shape=shape, dtype=tf.float32, stddev=0.01), name=name)
        
        with tf.name_scope("user_layer"):
            user_W1 = init_variable([self.shape[1], self.userLayer[0]], "user_W1")
            user_out = tf.matmul(user_input, user_W1)
            for i in range(0, len(self.userLayer)-1):
                W = init_variable([self.userLayer[i], self.userLayer[i+1]], "user_W"+str(i+2))
                b = init_variable([self.userLayer[i+1]], "user_b"+str(i+2))
                user_out = tf.nn.relu(tf.add(tf.matmul(user_out, W), b))

        with tf.name_scope("item_layer"):
            item_W1 = init_variable([self.shape[0], self.itemLayer[0]], "item_W1")
            item_out = tf.matmul(item_input, item_W1)
            for i in range(0, len(self.itemLayer)-1):
                W = init_variable([self.itemLayer[i], self.itemLayer[i+1]], "item_W"+str(i+2))
                b = init_variable([self.itemLayer[i+1]], "item_b"+str(i+2))
                item_out = tf.nn.relu(tf.add(tf.matmul(item_out, W), b))
                
        norm_user_output = tf.sqrt(tf.reduce_sum(tf.square(user_out), axis=1))
        norm_item_output = tf.sqrt(tf.reduce_sum(tf.square(item_out), axis=1))
        self.y_ = tf.reduce_sum(tf.multiply(user_out, item_out), axis=1, keep_dims=False) / (norm_item_output* norm_user_output)
        self.y_ = tf.maximum(1e-6, self.y_)
        
        
    def add_loss(self):
        rate = self.rate
        losses =  rate * tf.log(self.y_) + (1 - rate) * tf.log(1 - self.y_)
        loss = -tf.reduce_sum(losses)
        self.loss = loss
        
        
        
    def add_train_step(self):
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_step = optimizer.minimize(self.loss)
        
        
        
    def init_sess(self):
        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.config.allow_soft_placement = True
        self.sess = tf.Session(config=self.config)
        self.sess.run(tf.global_variables_initializer())

        self.saver = tf.train.Saver()
        if os.path.exists(self.check_point):
            [os.remove(f) for f in os.listdir(self.check_point)]
        else:
            os.mkdir(self.checkPoint)
        def run(self):
        best_hr = -1
        best_NDCG = -1
        best_epoch = -1
        print("Start Training!")
        for epoch in range(self.maxEpochs):
            print("="*20+"Epoch ", epoch, "="*20)
            self.run_epoch(self.sess)
            print('='*50)
            print("Start Evaluation!")
            hr, NDCG = self.evaluate(self.sess, self.topK)
            print("Epoch ", epoch, "HR: {}, NDCG: {}".format(hr, NDCG))
            if hr > best_hr or NDCG > best_NDCG:
                best_hr = hr
                best_NDCG = NDCG
                best_epoch = epoch
                self.saver.save(self.sess, self.checkPoint)
            if epoch - best_epoch > self.earlyStop:
                print("Normal Early stop!")
                break
            print("="*20+"Epoch ", epoch, "End"+"="*20)
        print("Best hr: {}, NDCG: {}, At Epoch {}".format(best_hr, best_NDCG, best_epoch))
        print("Training complete!")
        
        

SyntaxError: unexpected EOF while parsing (<ipython-input-37-d757fa92b683>, line 41)