In [1]:
# import modules
import copy
import time
from sys import path
from collections import defaultdict

In [2]:
import pandas as pd
from surprise import SVD
from surprise import Reader
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split

In [3]:
path.append('../code/src')

In [4]:
import svd_constraint
from post_rec import PostRec
from inter_rec import InterRec
from evaluate import Evaluation as ev

In [5]:
# constants
# file path
input_path = "../data/"
output_path = '../result/'
rate_file = 'reduced_rating_data.csv'
attr_file = 'recipe_data.csv'

# user/item max
user_max = 1000
food_max = 5000

# const count
const_count = 3

In [6]:
# Dictionary keys
rs1 = 'w/o Constraint'
rs2 = 'Post-Single-'
rs3 = 'Inter-Single-'
rs4 = 'Post-Mixed'
rs5 = 'Inter-Mixed'
val_r1 = 'RMSE'
val_r2 = 'nDCG'
val_t1 = 'train time(s)'
val_t2 = 'exec time(s)'

In [7]:
val_dict = {
    val_r1: 0,
    val_r2: 0,
    val_t1: 0,
    val_t2: 0
}

keys = [rs1]
keys = keys + [rs2 + str(i) for i in range (1, 4)]
keys = keys + [rs3 + str(i) for i in range (1, 4)]
keys = keys + [rs4]
keys = keys + [rs5]

result = {key: copy.deepcopy(val_dict) for key in keys}

In [18]:
# read previous result
result_df = pd.read_csv(output_path + 'evaluation.csv', index_col = 0).transpose()
result.update(result_df.to_dict())
result

{'w/o Constraint': {'RMSE': 0.7682821465950518,
  'nDCG': 0.0051929891878815,
  'train time(s)': 0.2593071460723877,
  'exec time(s)': 14.500491380691528},
 'Post-Single-1': {'RMSE': 4.592464472038014,
  'nDCG': 0.0912816459995951,
  'train time(s)': 0.2559752464294433,
  'exec time(s)': 48.29167032241821},
 'Post-Single-2': {'RMSE': 0.9879921130609528,
  'nDCG': 0.0137376545335493,
  'train time(s)': 0.255358616511027,
  'exec time(s)': 17.162640412648518},
 'Post-Single-3': {'RMSE': 0.8400977749904025,
  'nDCG': 0.017505186056049,
  'train time(s)': 0.2569793065388997,
  'exec time(s)': 31.868903636932373},
 'Inter-Single-1': {'RMSE': 3.897510436583488,
  'nDCG': 0.0744697874431628,
  'train time(s)': 253.95080041885376,
  'exec time(s)': 9.562866846720375},
 'Inter-Single-2': {'RMSE': 0.926631068766342,
  'nDCG': 0.0130819371634332,
  'train time(s)': 265.93812640508014,
  'exec time(s)': 15.940479675928753},
 'Inter-Single-3': {'RMSE': 0.8272394638499362,
  'nDCG': 0.01701421051457

# Evaluate Post-Rec w/ Mixed Type Constraint

In [9]:
def run_post(idx, save_result = True):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = PostRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv',
                            need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(predict_test)
       
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start
    
    # calculate ndcg
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 10)
    
    if save_result:       
        top_n_df.to_csv(output_path + 'PostRec' + '.' + str(idx) + '.csv')
        
    return r1, r2, t1, t2

In [10]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_post(j, True)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs4][val_r1] = r1_sum/const_count
result[rs4][val_r2] = r2_sum/const_count
result[rs4][val_t1] = t1_sum/const_count
result[rs4][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


### Sample Dataframe

In [11]:
pd.read_csv(output_path + 'PostRec.1.csv', index_col = 0).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,i1,i2,nl
0,25.0,1715.0,425.0,2657.0,565.0,2851.0,1454.0,2912.0,4336.0,3189.0,1505.0,,
2,3198.0,3781.0,4912.0,1089.0,1224.0,1481.0,942.0,281.0,1871.0,3424.0,7233.0,,
3,3431.0,4665.0,1856.0,870.0,4245.0,160.0,437.0,4670.0,4302.0,4243.0,3668.0,,
4,2221.0,252.0,3670.0,2495.0,2627.0,1253.0,909.0,3616.0,1548.0,2527.0,4574.0,,
5,1926.0,1984.0,4493.0,205.0,3949.0,681.0,4793.0,3417.0,4841.0,3389.0,332.0,,


# Evaluate Inter-Rec w/ Mixed Type Constraint

In [12]:
def run_inter(idx, save_result = True):    
    # InterRec applies constraint during the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv', 
                            svd_constraint.CnstSVD(), need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(predict_test)
    
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start

    # calculate nDCG
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, 10)
    
    if save_result:       
        top_n_df.to_csv(output_path + 'InterRec' + '.' + str(idx) + '.csv')
        
    return r1, r2, t1, t2

In [13]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_inter(j, True)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs5][val_r1] = r1_sum/const_count
result[rs5][val_r2] = r2_sum/const_count
result[rs5][val_t1] = t1_sum/const_count
result[rs5][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


In [14]:
pd.read_csv(output_path + 'InterRec.1.csv', index_col = 0).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,i1,i2,nl
309,193,3885,2568,4888,2077,1961,3707,1653,2931,678,,,"[245.1, 7.0, 7.0, 28.0, 83.0, 6.0, 1.0]"
564,648,677,812,2155,4960,4718,4091,484,3300,532,,332.0,"[35.9, 1.0, 5.0, 6.0, 5.0, 3.0, 1.0]"
560,1252,4534,301,1129,2943,1495,647,1715,1854,2691,,5180.0,"[9.2, 0.0, 2.0, 1.0, 2.0, 0.0, 0.0]"
25,633,2657,72,3146,2086,4686,4118,4270,4509,474,5006.0,,
873,3138,5,1221,1557,239,1752,4583,75,4096,569,6276.0,6270.0,"[61.8, 0.0, 42.0, 7.0, 0.0, 0.0, 5.0]"


# Result

In [20]:
result_df = pd.DataFrame.from_dict(result, orient='index', columns=['RMSE', 'nDCG', 'train time(s)', 'exec time(s)'])
result_df.to_csv(output_path + 'evaluation.csv')
result_df

Unnamed: 0,RMSE,nDCG,train time(s),exec time(s)
w/o Constraint,0.768282,0.005193,0.259307,14.500491
Post-Single-1,4.592464,0.091282,0.255975,48.29167
Post-Single-2,0.987992,0.013738,0.255359,17.16264
Post-Single-3,0.840098,0.017505,0.256979,31.868904
Inter-Single-1,3.89751,0.07447,253.9508,9.562867
Inter-Single-2,0.926631,0.013082,265.938126,15.94048
Inter-Single-3,0.827239,0.017014,380.395853,18.645201
Post-Mixed,3.183835,0.025761,0.264625,60.507408
Inter-Mixed,2.755712,0.02526,290.665039,13.510586
