In [1]:
# import modules
import copy
import time
from sys import path
from collections import defaultdict

In [2]:
import pandas as pd
from surprise import SVD, Reader, Dataset, accuracy
from surprise.model_selection import train_test_split
from surprise.prediction_algorithms.predictions import Prediction

In [3]:
path.append('../code/src')

In [4]:
import svd_constraint
from post_rec import PostRec
from inter_rec import InterRec
from evaluate import Evaluation as ev

In [5]:
# constants
# file path
input_path = "../data/"
output_path = '../result/'
rate_file = 'reduced_rating_data.csv'
attr_file = 'recipe_data.csv'

# user/item max
user_max = 1000
food_max = 5000

# const count
const_count = 3

In [6]:
# Dictionary keys
rs2 = 'Post-Single-'
rs3 = 'Inter-Single-'
rs4 = 'Post-Mixed'
rs5 = 'Inter-Mixed'
val_r1 = 'RMSE'
val_r2 = 'RMSE_filter'

In [7]:
val_dict = {
    val_r1: 0,
    val_r2: 0
}

keys = []
keys = keys + [rs2 + str(i) for i in range (1, 4)]
keys = keys + [rs3 + str(i) for i in range (1, 4)]
keys = keys + [rs4]
keys = keys + [rs5]

result = {key: copy.deepcopy(val_dict) for key in keys}

In [None]:
# read previous result
result_df = pd.read_csv(output_path + 'evaluation_rmse.csv', index_col = 0).transpose()
result.update(result_df.to_dict())
result

# Post-Rec Single

In [10]:
def run_post_single(ctype, idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = PostRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const_' + str(ctype) + '.' + str(idx) + '.csv',
                            need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    rec.train()  

    # predict rating for test-set
    predict_test = rec.test_rmse()
    filtered = rec.filter_set(predict_test)
    
    r1 = ev.calculate_rmse(predict_test)
    r2 = ev.calculate_rmse(filtered)
     
    return r1, r2

In [11]:
for i in range (1, 4):
    r1_sum = 0
    r2_sum = 0
    for j in range (1, const_count + 1):
        r1, r2 = run_post_single(i, j)
        r1_sum = r1_sum + r1
        r2_sum = r2_sum + r2
        print('Const_'+str(i)+'.'+str(j)+" done")
        
    result[rs2+str(i)][val_r1] = r1_sum/const_count
    result[rs2+str(i)][val_r2] = r2_sum/const_count

Const_1.1 done
Const_1.2 done
Const_1.3 done
Const_2.1 done
Const_2.2 done
Const_2.3 done
Const_3.1 done
Const_3.2 done
Const_3.3 done


# Inter-Rec Single

In [13]:
def run_inter_single(ctype, idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const_' + str(ctype) + '.' + str(idx) + '.csv', 
                            svd_constraint.CnstSVD(), need_test = True)

    rec.get_data()  # get rating, attribute, recipe data
    rec.train()  

    # predict rating for test-set
    predict_test = rec.test_rmse()
    filtered = rec.filter_set(predict_test)
    
    r1 = ev.calculate_rmse(predict_test)
    r2 = ev.calculate_rmse(filtered)
     
    return r1, r2

In [14]:
for i in range (1, 4):
    r1_sum = 0
    r2_sum = 0
    for j in range (1, const_count + 1): 
        r1, r2 = run_inter_single(i, j)
        r1_sum = r1_sum + r1
        r2_sum = r2_sum + r2
        print('Const_'+str(i)+'.'+str(j)+" done")
        
    result[rs3+str(i)][val_r1] = r1_sum/const_count
    result[rs3+str(i)][val_r2] = r2_sum/const_count

Const_1.1 done
Const_1.2 done
Const_1.3 done
Const_2.1 done
Const_2.2 done
Const_2.3 done
Const_3.1 done
Const_3.2 done
Const_3.3 done


# Post-Rec Mixed

In [16]:
def run_post(idx):    
    # PostRec applies constraint after the rating of each item is predicted
    rec = PostRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv',
                            need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    rec.train()  

    # predict rating for test-set
    predict_test = rec.test_rmse()
    filtered = rec.filter_set(predict_test)
    
    r1 = ev.calculate_rmse(predict_test)
    r2 = ev.calculate_rmse(filtered)
     
    return r1, r2

In [17]:
r1_sum = 0
r2_sum = 0
for j in range (1, const_count + 1):
    r1, r2  = run_post(j)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    print('Const'+'.'+str(j)+" done")

result[rs4][val_r1] = r1_sum/const_count
result[rs4][val_r2] = r2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# Inter-Rec Mixed

In [18]:
def run_inter(idx):    
    # InterRec applies constraint during the rating of each item is predicted
    rec = InterRec(input_path + rate_file, input_path + attr_file, 
                           input_path + 'const' + '.' + str(idx) + '.csv', 
                            svd_constraint.CnstSVD(), need_test = True)
    
    rec.get_data()  # get rating, attribute, recipe data
    rec.train()  

    # predict rating for test-set
    predict_test = rec.test_rmse()
    filtered = rec.filter_set(predict_test)
    
    r1 = ev.calculate_rmse(predict_test)
    r2 = ev.calculate_rmse(filtered)
     
    return r1, r2

In [21]:
r1_sum = 0
r2_sum = 0
for j in range (1, const_count + 1):
    r1, r2  = run_inter(j)
    r1_sum = r1_sum + r1
    r2_sum = r2_sum + r2
    print('Const'+'.'+str(j)+" done")

result[rs5][val_r1] = r1_sum/const_count
result[rs5][val_r2] = r2_sum/const_count

Const.1 done
Const.2 done
Const.3 done


# result

In [15]:
result

{'Post-Single-1': {'RMSE': 4.592453002304395,
  'RMSE_filter': 0.5287663497411557},
 'Post-Single-2': {'RMSE': 0.9879374977136214,
  'RMSE_filter': 0.7622853376587843},
 'Post-Single-3': {'RMSE': 0.8401123859236184,
  'RMSE_filter': 0.8383658016993903},
 'Inter-Single-1': {'RMSE': 3.897424252227903,
  'RMSE_filter': 0.5229552105203664},
 'Inter-Single-2': {'RMSE': 0.9258899533820216,
  'RMSE_filter': 0.7617867227228471},
 'Inter-Single-3': {'RMSE': 0.8279185841737985,
  'RMSE_filter': 0.8261491282428216},
 'Post-Mixed': {'RMSE': 0, 'RMSE_filter': 0},
 'Inter-Mixed': {'RMSE': 0, 'RMSE_filter': 0}}

In [22]:
result_df = pd.DataFrame.from_dict(result, orient='index', columns=['RMSE', 'RMSE_filter'])
result_df.to_csv(output_path + 'evaluation_rmse.csv')
result_df

Unnamed: 0,RMSE,RMSE_filter
Post-Single-1,4.592453,0.528766
Post-Single-2,0.987937,0.762285
Post-Single-3,0.840112,0.838366
Inter-Single-1,3.897424,0.522955
Inter-Single-2,0.92589,0.761787
Inter-Single-3,0.827919,0.826149
Post-Mixed,3.184157,0.768818
Inter-Mixed,2.755669,0.765353
