In [1]:
# import modules
import copy
import time
from sys import path
from collections import defaultdict

In [2]:
import pandas as pd
from surprise import SVD, Reader, Dataset, accuracy
from surprise.model_selection import train_test_split

In [3]:
path.append('../code/src')

In [4]:
import svd_constraint
from inter_rec import InterRec
from evaluate import Evaluation as ev

In [5]:
# constants
# file path
input_path = "../data/"
output_path = '../result/'
rate_file = 'reduced_rating_data.csv'
attr_file = 'recipe_data.csv'

# user/item max
user_max = 1000
food_max = 5000

# const count
const_count = 3

In [6]:
# Dictionary keys
rs1 = 'CnstSVD'
rs2 = 'CnstSVD-all'
rs3 = 'CnstSVD-hard'
rs4 = 'CnstSVD-harder'
val_r1 = 'RMSE'
val_r2 = 'nDCG'
val_t1 = 'train time(s)'
val_t2 = 'exec time(s)'

In [7]:
val_dict = {
    val_r1: 0,
    val_r2: 0,
    val_t1: 0,
    val_t2: 0
}

keys = [rs1]
keys = keys + [rs2]
keys = keys + [rs3]
keys = keys + [rs4]

result = {key: copy.deepcopy(val_dict) for key in keys}

In [8]:
# read previous result
result_df = pd.read_csv(output_path + 'evaluation_SVD.csv', index_col = 0).transpose()
result.update(result_df.to_dict())
result

{'CnstSVD': {'RMSE': 2.7557092509505217,
  'nDCG': 0.0507049115302035,
  'train time(s)': 289.63712700208026,
  'exec time(s)': 13.816434780756632},
 'CnstSVD-all': {'RMSE': 2.755484698077936,
  'nDCG': 0.0478820478116224,
  'train time(s)': 289.65573898951214,
  'exec time(s)': 13.525635798772177},
 'CnstSVD-hard': {'RMSE': 2.7617482917114877,
  'nDCG': 0.0190228721422972,
  'train time(s)': 290.13506960868835,
  'exec time(s)': 13.59165088335673},
 'CnstSVD-harder': {'RMSE': 2.7557959903092843,
  'nDCG': 0.0454090316923035,
  'train time(s)': 287.95565215746564,
  'exec time(s)': 13.50331203142802}}

# Evaluate CnstSVD

In [10]:
def run_inter(idx, algo, k):    
    # InterRec applies constraint after the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv', 
                            algo, need_test = True)
    rec.set_result_n(k)
    rec.get_data()  # get rating, attribute, recipe data
    
    # train with data
    start = time.time()
    rec.train()  
    t1 = time.time() - start
    
    # predict rating for test-set
    predict_test = rec.test_rmse()
    r1 = ev.calculate_rmse(predict_test)
    
    # get top-n for anti-test-set
    start = time.time()
    rec.test()
    top_n_df = rec.get_top_n()
    t2 = time.time() - start

    # calculate nDCG
    r2 = ev.calculate_ndcg(rec.get_rel(), top_n_df, k)
        
    return r1, r2, t1, t2

In [9]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_inter(j, svd_constraint.CnstSVD(), 50)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs1][val_r1] = r1_sum/const_count
result[rs1][val_r2] = r2_sum/const_count
result[rs1][val_t1] = t1_sum/const_count
result[rs1][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# Evaluate CnstSVD_all

In [10]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_inter(j, svd_constraint.CnstSVD_all(), 50)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs2][val_r1] = r1_sum/const_count
result[rs2][val_r2] = r2_sum/const_count
result[rs2][val_t1] = t1_sum/const_count
result[rs2][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# Evaluate CnstSVD_hard

In [11]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_inter(j, svd_constraint.CnstSVD_hard(), 50)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs3][val_r1] = r1_sum/const_count
result[rs3][val_r2] = r2_sum/const_count
result[rs3][val_t1] = t1_sum/const_count
result[rs3][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# Evaluate CnstSVD_harder

In [12]:
r1_sum = 0
r2_sum = 0
t1_sum = 0
t2_sum = 0
for j in range (1, const_count + 1):
    r1, r2, t1, t2 = run_inter(j, svd_constraint.CnstSVD_harder(), 50)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    t1_sum = t1_sum + t1
    t2_sum = t2_sum + t2
    print('Const'+'.'+str(j)+" done")

result[rs4][val_r1] = r1_sum/const_count
result[rs4][val_r2] = r2_sum/const_count
result[rs4][val_t1] = t1_sum/const_count
result[rs4][val_t2] = t2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# Result

In [12]:
result_df = pd.DataFrame.from_dict(result, orient='index', columns=['RMSE', 'nDCG', 'train time(s)', 'exec time(s)'])
result_df.to_csv(output_path + 'evaluation_SVD.csv')
result_df

Unnamed: 0,RMSE,nDCG,train time(s),exec time(s)
CnstSVD,2.755709,0.050705,289.637127,13.816435
CnstSVD-all,2.755485,0.047882,289.655739,13.525636
CnstSVD-hard,2.755557,0.041018,286.625216,13.718161
CnstSVD-harder,2.755796,0.045409,287.955652,13.503312
