In [1]:
# import modules
import copy
import time
from sys import path
from collections import defaultdict

In [2]:
import pandas as pd
from surprise import SVD, Reader, Dataset, accuracy
from surprise.model_selection import train_test_split

In [3]:
path.append('../code/src')

In [4]:
import svd_constraint
from post_rec import PostRec
from inter_rec import InterRec
from evaluate import Evaluation as ev

In [5]:
# constants
# file path
input_path = "../data/"
output_path = '../result/'
rate_file = 'reduced_rating_data.csv'
attr_file = 'recipe_data.csv'

# user/item max
user_max = 1000
food_max = 5000

# const count
const_count = 3

In [17]:
# Dictionary keys
rs2 = 'Post-Single-'
rs3 = 'Inter-Single-'
rs4 = 'Post-Mixed'
rs5 = 'Inter-Mixed'
val_r1 = 'RMSE_filter'
val_r2 = 'nDCG(100)'
val_r3 = 'nDCG(1000)'
val_t1 = 'train time(s)'
val_t2 = 'exec time(s)'

In [23]:
val_dict = {
    val_r1: 0,
    val_r2: 0,
    val_r3: 0,
    val_t1: 0,
    val_t2: 0
}

keys = []
keys = keys + [rs2 + str(i) for i in range (1, 4)]
keys = keys + [rs3 + str(i) for i in range (1, 4)]
keys = keys + [rs4]
keys = keys + [rs5]

result = {key: copy.deepcopy(val_dict) for key in keys}

In [24]:
# read previous result
result_df = pd.read_csv(output_path + 'evaluation_adv.csv', index_col = 0).transpose()
result.update(result_df.to_dict())
result

FileNotFoundError: [Errno 2] No such file or directory: '../result/evaluation_adv.csv'

In [25]:
result

{'Post-Single-1': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Post-Single-2': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Post-Single-3': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Inter-Single-1': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Inter-Single-2': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Inter-Single-3': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Post-Mixed': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0},
 'Inter-Mixed': {'RMSE_filter': 0,
  'nDCG(100)': 0,
  'nDCG(1000)': 0,
  'train time(s)': 0,
  'exec time(s)': 0}}

# Post Rec Single

In [26]:
def run_post_single(ctype, idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = PostRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const_' + str(ctype) + '.' + str(idx) + '.csv',
                            need_test = True)
    rec.result_N = 1000
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(rec.filter(predict_test))
       
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start
    
    # calculate ndcg
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 100)
    r3 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 1000)
     
    return r1, r2, r3, t1, t2

In [27]:
for i in range (1, 4):
    r1, r2, r3, t1, t2 = run_post_single(i, 1)
    print('Const_'+str(i)+'.'+str(1)+" done")

    result[rs2+str(i)][val_r1] = r1
    result[rs2+str(i)][val_r2] = r2
    result[rs2+str(i)][val_r3] = r3
    result[rs2+str(i)][val_t1] = t1
    result[rs2+str(i)][val_t2] = t2

Const_1.1 done
Const_2.1 done
Const_3.1 done


# Inter Rec Mixed

In [29]:
def run_inter_single(ctype, idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const_' + str(ctype) + '.' + str(idx) + '.csv', 
                            svd_constraint.CnstSVD(), need_test = True)
    rec.result_N = 1000
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(rec.filter(predict_test))
    
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start

    # calculate ndcg
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 100)
    r3 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 1000)
       
    return r1, r2, r3, t1, t2

In [30]:
for i in range (1, 4):
    r1, r2, r3, t1, t2 = run_inter_single(i, 1)
    print('Const_'+str(i)+'.'+str(1)+" done")

    result[rs3+str(i)][val_r1] = r1
    result[rs3+str(i)][val_r2] = r2
    result[rs3+str(i)][val_r3] = r3
    result[rs3+str(i)][val_t1] = t1
    result[rs3+str(i)][val_t2] = t2

Const_1.1 done
Const_2.1 done
Const_3.1 done


# Post-Rec Mixed

In [31]:
def run_post(idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = PostRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv',
                            need_test = True)
    rec.result_N = 1000
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(rec.filter(predict_test))
       
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start
    
    # calculate ndcg
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 100)
    r3 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 1000)
        
    return r1, r2, r3, t1, t2

In [33]:
r1, r2, r3, t1, t2 = run_post(i)
print('Const_'+str(i)+'.'+str(1)+" done")

result[rs4][val_r1] = r1
result[rs4][val_r2] = r2
result[rs4][val_r3] = r3
result[rs4][val_t1] = t1
result[rs4][val_t2] = t2

Const_3.1 done


# Inter-Rec Mixed

In [34]:
def run_inter(idx):    
    # InterRec applies constraint during the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv', 
                            svd_constraint.CnstSVD(), need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(rec.filter(predict_test))
    
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start

    # calculate ndcg
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 100)
    r3 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 1000)
        
    return r1, r2, r3, t1, t2

In [36]:
r1, r2, r3, t1, t2 = run_inter(i)
print('Const_'+str(i)+'.'+str(1)+" done")

result[rs5][val_r1] = r1
result[rs5][val_r2] = r2
result[rs5][val_r3] = r3
result[rs5][val_t1] = t1
result[rs5][val_t2] = t2

Const_3.1 done


# result

In [37]:
result

{'Post-Single-1': {'RMSE_filter': 4.591328343606354,
  'nDCG(100)': 0.15491540164486076,
  'nDCG(1000)': 0.2093510543762797,
  'train time(s)': 0.2563142776489258,
  'exec time(s)': 253.03286123275757},
 'Post-Single-2': {'RMSE_filter': 1.022436225700596,
  'nDCG(100)': 0.023654106468480585,
  'nDCG(1000)': 0.08639112040429298,
  'train time(s)': 0.25830936431884766,
  'exec time(s)': 126.68394017219543},
 'Post-Single-3': {'RMSE_filter': 0.846855053114996,
  'nDCG(100)': 0.04265655649402933,
  'nDCG(1000)': 0.09527679847872073,
  'train time(s)': 0.25531744956970215,
  'exec time(s)': 251.2007508277893},
 'Inter-Single-1': {'RMSE_filter': 3.9030095864579653,
  'nDCG(100)': 0.18369048146188013,
  'nDCG(1000)': 0.2299706653792887,
  'train time(s)': 258.4037415981293,
  'exec time(s)': 9.900744915008545},
 'Inter-Single-2': {'RMSE_filter': 0.9359296052574421,
  'nDCG(100)': 0.03197563989412377,
  'nDCG(1000)': 0.08869732735956162,
  'train time(s)': 267.8210301399231,
  'exec time(s)': 

In [39]:
result_df = pd.DataFrame.from_dict(result, orient='index', columns=['RMSE_filter', 'nDCG(100)', 'nDCG(1000)', 'train time(s)', 'exec time(s)'])
result_df.to_csv(output_path + 'evaluation_adv.csv')
result_df

Unnamed: 0,RMSE_filter,nDCG(100),nDCG(1000),train time(s),exec time(s)
Post-Single-1,4.591328,0.154915,0.209351,0.256314,253.032861
Post-Single-2,1.022436,0.023654,0.086391,0.258309,126.68394
Post-Single-3,0.846855,0.042657,0.095277,0.255317,251.200751
Inter-Single-1,3.90301,0.18369,0.229971,258.403742,9.900745
Inter-Single-2,0.93593,0.031976,0.088697,267.82103,15.809842
Inter-Single-3,0.833578,0.044739,0.098245,386.190674,18.778377
Post-Mixed,3.176995,0.058899,0.110157,0.258368,223.69678
Inter-Mixed,2.743762,0.032686,0.032686,288.586441,13.235717
