In [1]:
# Setup: Run the following command in your terminal
# download xcode from https://apps.apple.com/us/app/xcode/id497799835?mt=12/
# conda create -n recommendation_env python=3.9
# conda activate recommendation_env
# pip3 install Cython numpy scipy
# python3 -m pip install 'tensorflow'
# pip install tf_slim
# pip install recommenders
# git clone https://github.com/recommenders-team/recommenders.git
# python -m ipykernel install --user --name recommendation_env --display-name recommendation_kernel

# Open this notebook
# Select Jupyter kernel recommendation_kernel
# Run the notebook

In [2]:
# See recommenders/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb on the details of this model

In [2]:
import pandas as pd
import json
import requests
import os
import gzip
import json
import re
import os
import sys
import numpy as np
import pandas as pd
import csv
from collections import defaultdict, Counter
from sklearn.model_selection import train_test_split
import time
import tensorflow as tf
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.evaluation.python_evaluation import (
    map, ndcg_at_k, precision_at_k, recall_at_k, rmse
)
from recommenders.utils.constants import SEED as DEFAULT_SEED

In [13]:
DIR = './data'

filename = 'goodreads_interactions_2k_users_fantasy_paranormal.json.gz'
with gzip.open(os.path.join(DIR, filename), 'r') as json_file:
    json_reviews = json.load(json_file)

df_reviews = pd.DataFrame(json_reviews)

df_reviews


Unnamed: 0,user_id,book_id,review_id,is_read,rating,review_text_incomplete,date_added,date_updated,read_at,started_at
0,f8a89075dc6de14857561522e729f82c,27409149,5f9ae8d613787ccbfab1db5eb05157db,True,4,,Wed Jul 27 09:40:11 -0700 2016,Wed Jul 27 09:41:36 -0700 2016,Wed Jul 27 09:41:36 -0700 2016,Wed Jul 27 00:00:00 -0700 2016
1,f8a89075dc6de14857561522e729f82c,22913616,49b5ef6540bf9900e4994b6be369e584,True,4,,Thu Sep 03 10:00:53 -0700 2015,Thu Sep 03 10:10:59 -0700 2015,Thu Sep 03 10:10:59 -0700 2015,Thu Sep 03 00:00:00 -0700 2015
2,f8a89075dc6de14857561522e729f82c,23254391,73c31b0d8e90f71ea9f1d004dcf3007e,True,5,,Thu Sep 03 08:48:07 -0700 2015,Thu Sep 03 08:49:03 -0700 2015,Thu Sep 03 08:49:03 -0700 2015,Thu Sep 03 00:00:00 -0700 2015
3,f8a89075dc6de14857561522e729f82c,25362018,b404191c91e92b735d6f1edb5234322d,True,3,,Thu Sep 03 08:38:08 -0700 2015,Mon Feb 06 20:34:45 -0800 2017,Mon Feb 06 00:00:00 -0800 2017,Mon Feb 06 00:00:00 -0800 2017
4,f8a89075dc6de14857561522e729f82c,25005214,f62977d6169ceb9f1153f174e3f99b3c,True,3,,Thu Sep 03 08:38:05 -0700 2015,Thu Feb 02 20:22:22 -0800 2017,Thu Feb 02 00:00:00 -0800 2017,Thu Feb 02 00:00:00 -0800 2017
...,...,...,...,...,...,...,...,...,...,...
505923,65555af9902bd66f825fb8ee7fe52698,15453,25c9ad48ff8d6b1fd5517e4d728d5b7d,True,5,,Thu Jul 19 19:26:44 -0700 2012,Thu Jul 19 19:26:44 -0700 2012,,
505924,65555af9902bd66f825fb8ee7fe52698,99450,006507ea06837569904e70e6a31bf386,True,4,,Thu Jul 19 19:26:41 -0700 2012,Thu Jul 19 19:26:41 -0700 2012,,
505925,65555af9902bd66f825fb8ee7fe52698,15449,574b9b9a3512acf19bb3da30bc3913b6,True,4,,Thu Jul 19 19:26:39 -0700 2012,Thu Jul 19 19:26:39 -0700 2012,,
505926,65555af9902bd66f825fb8ee7fe52698,99449,b8fe8c238463846c0ea94e7023dd5719,True,4,,Thu Jul 19 19:26:38 -0700 2012,Thu Jul 19 19:26:38 -0700 2012,,


In [14]:
# Remove rows where 'is_read' == False
df_reviews = df_reviews[df_reviews['is_read'] == True]

columns_to_keep = [
    'user_id', 'book_id', 'rating', 'date_added'
]

df_reviews = df_reviews[columns_to_keep]

In [15]:
from datetime import datetime
df_reviews['datetime'] = df_reviews['date_added'].apply(lambda x: datetime.strptime(x, "%a %b %d %H:%M:%S %z %Y"))
df_reviews['timestamp'] = df_reviews['datetime'].apply(lambda x: int(x.timestamp()))
df_reviews

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp
0,f8a89075dc6de14857561522e729f82c,27409149,4,Wed Jul 27 09:40:11 -0700 2016,2016-07-27 09:40:11-07:00,1469637611
1,f8a89075dc6de14857561522e729f82c,22913616,4,Thu Sep 03 10:00:53 -0700 2015,2015-09-03 10:00:53-07:00,1441299653
2,f8a89075dc6de14857561522e729f82c,23254391,5,Thu Sep 03 08:48:07 -0700 2015,2015-09-03 08:48:07-07:00,1441295287
3,f8a89075dc6de14857561522e729f82c,25362018,3,Thu Sep 03 08:38:08 -0700 2015,2015-09-03 08:38:08-07:00,1441294688
4,f8a89075dc6de14857561522e729f82c,25005214,3,Thu Sep 03 08:38:05 -0700 2015,2015-09-03 08:38:05-07:00,1441294685
...,...,...,...,...,...,...
505923,65555af9902bd66f825fb8ee7fe52698,15453,5,Thu Jul 19 19:26:44 -0700 2012,2012-07-19 19:26:44-07:00,1342751204
505924,65555af9902bd66f825fb8ee7fe52698,99450,4,Thu Jul 19 19:26:41 -0700 2012,2012-07-19 19:26:41-07:00,1342751201
505925,65555af9902bd66f825fb8ee7fe52698,15449,4,Thu Jul 19 19:26:39 -0700 2012,2012-07-19 19:26:39-07:00,1342751199
505926,65555af9902bd66f825fb8ee7fe52698,99449,4,Thu Jul 19 19:26:38 -0700 2012,2012-07-19 19:26:38-07:00,1342751198


In [16]:
df_reviews['userID'] = df_reviews['user_id']
df_reviews['itemID'] = df_reviews['book_id']
df_reviews

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID
0,f8a89075dc6de14857561522e729f82c,27409149,4,Wed Jul 27 09:40:11 -0700 2016,2016-07-27 09:40:11-07:00,1469637611,f8a89075dc6de14857561522e729f82c,27409149
1,f8a89075dc6de14857561522e729f82c,22913616,4,Thu Sep 03 10:00:53 -0700 2015,2015-09-03 10:00:53-07:00,1441299653,f8a89075dc6de14857561522e729f82c,22913616
2,f8a89075dc6de14857561522e729f82c,23254391,5,Thu Sep 03 08:48:07 -0700 2015,2015-09-03 08:48:07-07:00,1441295287,f8a89075dc6de14857561522e729f82c,23254391
3,f8a89075dc6de14857561522e729f82c,25362018,3,Thu Sep 03 08:38:08 -0700 2015,2015-09-03 08:38:08-07:00,1441294688,f8a89075dc6de14857561522e729f82c,25362018
4,f8a89075dc6de14857561522e729f82c,25005214,3,Thu Sep 03 08:38:05 -0700 2015,2015-09-03 08:38:05-07:00,1441294685,f8a89075dc6de14857561522e729f82c,25005214
...,...,...,...,...,...,...,...,...
505923,65555af9902bd66f825fb8ee7fe52698,15453,5,Thu Jul 19 19:26:44 -0700 2012,2012-07-19 19:26:44-07:00,1342751204,65555af9902bd66f825fb8ee7fe52698,15453
505924,65555af9902bd66f825fb8ee7fe52698,99450,4,Thu Jul 19 19:26:41 -0700 2012,2012-07-19 19:26:41-07:00,1342751201,65555af9902bd66f825fb8ee7fe52698,99450
505925,65555af9902bd66f825fb8ee7fe52698,15449,4,Thu Jul 19 19:26:39 -0700 2012,2012-07-19 19:26:39-07:00,1342751199,65555af9902bd66f825fb8ee7fe52698,15449
505926,65555af9902bd66f825fb8ee7fe52698,99449,4,Thu Jul 19 19:26:38 -0700 2012,2012-07-19 19:26:38-07:00,1342751198,65555af9902bd66f825fb8ee7fe52698,99449


In [17]:
# Sample data for faster training
df_reviews_sample = df_reviews.sample(n=100000)

In [18]:
df_reviews_sample.head()

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID
401533,649e6bf4c61f3b82e14a90d0ec63610b,153780,5,Mon Jul 16 12:20:46 -0700 2012,2012-07-16 12:20:46-07:00,1342466446,649e6bf4c61f3b82e14a90d0ec63610b,153780
395130,fe7ba825d2ffdf658aeccfa9efe7d325,68429,4,Thu Jul 24 13:41:13 -0700 2014,2014-07-24 13:41:13-07:00,1406234473,fe7ba825d2ffdf658aeccfa9efe7d325,68429
356504,268bf6c7b9333be53140e953103d21d0,11557,4,Wed Dec 05 12:04:25 -0800 2012,2012-12-05 12:04:25-08:00,1354737865,268bf6c7b9333be53140e953103d21d0,11557
378684,0f2e8aa9912c8b484e4f719ce9cfee22,6871358,3,Thu May 15 17:35:25 -0700 2014,2014-05-15 17:35:25-07:00,1400200525,0f2e8aa9912c8b484e4f719ce9cfee22,6871358
253074,7c2334f465a430ffe41b630c8951cb0f,13186804,3,Mon Feb 25 18:55:26 -0800 2013,2013-02-25 18:55:26-08:00,1361847326,7c2334f465a430ffe41b630c8951cb0f,13186804


In [19]:
# Create a mapping of unique user IDs to smaller integers
unique_user_ids = df_reviews_sample['userID'].unique()
user_id_mapping = {user_id: idx for idx, user_id in enumerate(unique_user_ids)}

# Replace the userID column with the mapped values
df_reviews_sample['userID'] = df_reviews_sample['userID'].map(user_id_mapping)

In [20]:
# Create a mapping of unique item IDs to smaller integers
unique_item_ids = df_reviews_sample['itemID'].unique()
item_id_mapping = {item_id: idx for idx, item_id in enumerate(unique_item_ids)}

# Replace the userID column with the mapped values
df_reviews_sample['itemID'] = df_reviews_sample['itemID'].map(item_id_mapping)

In [21]:
df_reviews_sample

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID
401533,649e6bf4c61f3b82e14a90d0ec63610b,153780,5,Mon Jul 16 12:20:46 -0700 2012,2012-07-16 12:20:46-07:00,1342466446,0,0
395130,fe7ba825d2ffdf658aeccfa9efe7d325,68429,4,Thu Jul 24 13:41:13 -0700 2014,2014-07-24 13:41:13-07:00,1406234473,1,1
356504,268bf6c7b9333be53140e953103d21d0,11557,4,Wed Dec 05 12:04:25 -0800 2012,2012-12-05 12:04:25-08:00,1354737865,2,2
378684,0f2e8aa9912c8b484e4f719ce9cfee22,6871358,3,Thu May 15 17:35:25 -0700 2014,2014-05-15 17:35:25-07:00,1400200525,3,3
253074,7c2334f465a430ffe41b630c8951cb0f,13186804,3,Mon Feb 25 18:55:26 -0800 2013,2013-02-25 18:55:26-08:00,1361847326,4,4
...,...,...,...,...,...,...,...,...
137297,004f6d189c4546a39db2162986c0d9e1,59219,5,Wed Jan 29 18:58:23 -0800 2014,2014-01-29 18:58:23-08:00,1391050703,643,2127
24006,eea33d75f76e522d4ebdac0fa08179d5,112754,3,Thu Mar 26 13:02:14 -0700 2009,2009-03-26 13:02:14-07:00,1238097734,176,9862
292086,191941c990613321aea2deda41bd8997,4214,5,Mon Apr 07 10:27:31 -0700 2008,2008-04-07 10:27:31-07:00,1207589251,1026,1483
346956,c0372497d4e77b851e2a4c38ab5bf235,11,0,Mon Jan 09 12:31:55 -0800 2017,2017-01-09 12:31:55-08:00,1483993915,1040,925


In [22]:
#df_reviews_sample = df_reviews_sample[df_reviews_sample['rating'] != 3]
df_reviews_sample['old_rating'] = df_reviews_sample['rating']
# Step 2: Set rating to 1 where rating >= 3 and 0 where rating <= 3
df_reviews_sample['rating'] = df_reviews_sample['rating'].apply(lambda x: 1 if x > 3 else 0)
df_reviews_sample

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID,old_rating
401533,649e6bf4c61f3b82e14a90d0ec63610b,153780,1,Mon Jul 16 12:20:46 -0700 2012,2012-07-16 12:20:46-07:00,1342466446,0,0,5
395130,fe7ba825d2ffdf658aeccfa9efe7d325,68429,1,Thu Jul 24 13:41:13 -0700 2014,2014-07-24 13:41:13-07:00,1406234473,1,1,4
356504,268bf6c7b9333be53140e953103d21d0,11557,1,Wed Dec 05 12:04:25 -0800 2012,2012-12-05 12:04:25-08:00,1354737865,2,2,4
378684,0f2e8aa9912c8b484e4f719ce9cfee22,6871358,0,Thu May 15 17:35:25 -0700 2014,2014-05-15 17:35:25-07:00,1400200525,3,3,3
253074,7c2334f465a430ffe41b630c8951cb0f,13186804,0,Mon Feb 25 18:55:26 -0800 2013,2013-02-25 18:55:26-08:00,1361847326,4,4,3
...,...,...,...,...,...,...,...,...,...
137297,004f6d189c4546a39db2162986c0d9e1,59219,1,Wed Jan 29 18:58:23 -0800 2014,2014-01-29 18:58:23-08:00,1391050703,643,2127,5
24006,eea33d75f76e522d4ebdac0fa08179d5,112754,0,Thu Mar 26 13:02:14 -0700 2009,2009-03-26 13:02:14-07:00,1238097734,176,9862,3
292086,191941c990613321aea2deda41bd8997,4214,1,Mon Apr 07 10:27:31 -0700 2008,2008-04-07 10:27:31-07:00,1207589251,1026,1483,5
346956,c0372497d4e77b851e2a4c38ab5bf235,11,0,Mon Jan 09 12:31:55 -0800 2017,2017-01-09 12:31:55-08:00,1483993915,1040,925,0


In [23]:
df_reviews_sample.rating.value_counts()

rating
1    67258
0    32742
Name: count, dtype: int64

In [24]:
df_reviews_sample.to_csv('./sample_data.csv', index=False)

In [None]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 100
BATCH_SIZE = 256

SEED = DEFAULT_SEED  # Set None for non-deterministic results
# chronological spilt on every user
train, test = python_chrono_split(df_reviews_sample, 0.75)

# Filter out any users or items in the test set that do not appear in the training set.
test = test[test["userID"].isin(train["userID"].unique())]
test = test[test["itemID"].isin(train["itemID"].unique())]

# Create a test set containing the last interaction for each user as for the leave-one-out evaluation.
leave_one_out_test = test.groupby("userID").last().reset_index()

# Write datasets to csv files.
train_file = "./train.csv"
test_file = "./test.csv"
leave_one_out_test_file = "./leave_one_out_test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)
leave_one_out_test.to_csv(leave_one_out_test_file, index=False)


data = NCFDataset(train_file=train_file, test_file=leave_one_out_test_file, seed=SEED, overwrite_test_file_full=True)

In [26]:

model = NCF(n_users=data.n_users,n_items=data.n_items,model_type="NeuMF",n_factors=4,layer_sizes=[16,8,4],\
            n_epochs=EPOCHS,batch_size=BATCH_SIZE,learning_rate=1e-3,verbose=10,seed=SEED)
# n_factors (int): Dimension of latent space.
# layer_sizes (list): Number of layers for MLP.

# training the model
model.fit(data)


I0000 00:00:1733528165.151472  103008 mlir_graph_optimization_pass.cc:401] MLIR V1 optimization pass is not enabled
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [8.56s]: train_loss = 0.172693 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 20 [7.68s]: train_loss = 0.126556 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 30 [6.97s]: train_loss = 0.103446 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 40 [7.82s]: train_loss = 0.090821 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 50 [8.37s]: train_loss = 0.081215 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 60 [8.04s]: train_loss = 0.074756 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 70 [7.64s]: train_loss = 0.070926 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 80 [8.03s]: train_loss = 0.067788 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 90 [7.77s]: train_loss = 0.064452 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 100 [7.18s]: train_loss = 0.061668 


In [27]:
model.save("./ncf_model_keep_rating_3")

In [37]:
users, items, preds = [], [], []
item = list(train.itemID.unique())
for user in train.userID.unique():
    user = [user] * len(item) 
    users.extend(user)
    items.extend(item)
    preds.extend(list(model.predict(user, item, is_list=True)))

all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
all_predictions_drop_rating = merged[merged.rating.isnull()].drop('rating', axis=1)


In [38]:
eval_precision = precision_at_k(test, all_predictions_drop_rating, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions_drop_rating, col_prediction='prediction', k=TOP_K)
print("precision: ", eval_precision, " recall: ", eval_recall) 

  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(
  df_hit.groupby(col_user, as_index=False)[col_user].agg({"hit": "count"}),
  rating_true_common.groupby(col_user, as_index=False)[col_user].agg(


precision:  0.004394481349003576  recall:  0.0056025011170783125


In [28]:
def GenPredictions(data) :
  predictions = []
  # Iterate through each row of the DataFrame
  for _, row in data.iterrows():
    user_id = row['userID']
    item_id = row['itemID']
    
    # Get the prediction score from the model
    prediction_score = model.predict(user_id, item_id)
    
    # Store the user_id, item_id, and prediction_score in the list
    predictions.append([user_id, item_id, prediction_score])
  return predictions


In [29]:
test_predictions = GenPredictions(test)
test_prediction_df = pd.DataFrame(test_predictions, columns=['userID', 'itemID', 'prediction'])
test_prediction_df

Unnamed: 0,userID,itemID,prediction
0,0,427,9.811142e-01
1,0,18918,2.562572e-11
2,0,6214,4.339232e-06
3,0,2670,9.056802e-01
4,0,8508,1.010585e-06
...,...,...,...
14723,1998,4771,8.226363e-18
14724,1999,28,7.569882e-01
14725,1999,11900,1.637205e-26
14726,1999,21295,2.140307e-35


In [32]:
eval_rmse = rmse(test, test_prediction_df)
eval_rmse

np.float64(0.7134904572724021)

In [33]:
test_merged = pd.merge(test, test_prediction_df, on=["userID", "itemID"], how="outer")
test_merged

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID,old_rating,prediction
0,649e6bf4c61f3b82e14a90d0ec63610b,5907,1,Mon Dec 03 12:56:11 -0800 2012,2012-12-03 12:56:11-08:00,1354568171,0,427,4,9.811142e-01
1,649e6bf4c61f3b82e14a90d0ec63610b,68428,1,Wed Apr 02 15:29:57 -0700 2014,2014-04-02 15:29:57-07:00,1396477797,0,818,4,9.183215e-02
2,649e6bf4c61f3b82e14a90d0ec63610b,7996,0,Wed Jun 17 22:10:43 -0700 2015,2015-06-17 22:10:43-07:00,1434604243,0,1796,0,1.117659e-01
3,649e6bf4c61f3b82e14a90d0ec63610b,14061955,0,Mon Apr 29 20:25:24 -0700 2013,2013-04-29 20:25:24-07:00,1367292324,0,2670,3,9.056802e-01
4,649e6bf4c61f3b82e14a90d0ec63610b,15704459,1,Wed Sep 09 21:05:37 -0700 2015,2015-09-09 21:05:37-07:00,1441857937,0,2880,4,2.965990e-02
...,...,...,...,...,...,...,...,...,...,...
14723,07fb543fa483ec58744d4ef35612b619,25372801,1,Thu Nov 24 10:12:32 -0800 2016,2016-11-24 10:12:32-08:00,1480011152,1998,4771,4,8.226363e-18
14724,a6fc6bac96c9d8213960d8fb3030237c,27003,1,Mon Feb 13 10:10:34 -0800 2017,2017-02-13 10:10:34-08:00,1487009434,1999,28,4,7.569882e-01
14725,a6fc6bac96c9d8213960d8fb3030237c,64222,1,Wed Oct 11 10:59:03 -0700 2017,2017-10-11 10:59:03-07:00,1507744743,1999,96,5,1.502860e-02
14726,a6fc6bac96c9d8213960d8fb3030237c,18657790,0,Tue May 02 14:20:31 -0700 2017,2017-05-02 14:20:31-07:00,1493760031,1999,11900,3,1.637205e-26


In [35]:
train_predictions = GenPredictions(train)
train_prediction_df = pd.DataFrame(train_predictions, columns=['userID', 'itemID', 'prediction'])
train_merged = pd.merge(train, train_prediction_df, on=["userID", "itemID"], how="outer")
train_merged

Unnamed: 0,user_id,book_id,rating,date_added,datetime,timestamp,userID,itemID,old_rating,prediction
0,649e6bf4c61f3b82e14a90d0ec63610b,153780,1,Mon Jul 16 12:20:46 -0700 2012,2012-07-16 12:20:46-07:00,1342466446,0,0,5,7.598377e-01
1,649e6bf4c61f3b82e14a90d0ec63610b,7171637,1,Wed Jul 11 20:55:34 -0700 2012,2012-07-11 20:55:34-07:00,1342065334,0,33,4,8.684543e-01
2,649e6bf4c61f3b82e14a90d0ec63610b,136251,1,Mon Jul 16 15:07:27 -0700 2012,2012-07-16 15:07:27-07:00,1342476447,0,127,5,9.720023e-01
3,649e6bf4c61f3b82e14a90d0ec63610b,113436,1,Thu Jul 12 12:54:22 -0700 2012,2012-07-12 12:54:22-07:00,1342122862,0,441,5,9.326160e-01
4,649e6bf4c61f3b82e14a90d0ec63610b,153784,0,Mon Jul 16 12:21:00 -0700 2012,2012-07-16 12:21:00-07:00,1342466460,0,512,3,4.619372e-01
...,...,...,...,...,...,...,...,...,...,...
74975,a6fc6bac96c9d8213960d8fb3030237c,34541,1,Thu Jan 05 04:02:18 -0800 2017,2017-01-05 04:02:18-08:00,1483617738,1999,6796,4,9.865245e-01
74976,a6fc6bac96c9d8213960d8fb3030237c,24271,1,Wed Dec 16 08:54:49 -0800 2015,2015-12-16 08:54:49-08:00,1450284889,1999,14848,4,9.895349e-01
74977,a6fc6bac96c9d8213960d8fb3030237c,18652002,0,Sat Sep 19 13:07:39 -0700 2015,2015-09-19 13:07:39-07:00,1442693259,1999,15142,0,1.257875e-14
74978,a6fc6bac96c9d8213960d8fb3030237c,93383,0,Wed Sep 16 14:49:07 -0700 2015,2015-09-16 14:49:07-07:00,1442440147,1999,24779,3,1.265481e-19


In [39]:
train_rmse = rmse(train, train_prediction_df)
train_rmse

np.float64(0.1565745068945968)

In [36]:
test_merged.to_csv('./test_with_prediction.csv', index=False)
train_merged.to_csv('./train_with_prediction.csv', index=False)