## Import packages

In [29]:
import pandas as pd
import numpy as np
from CFModel import CFModel


## Define constants


In [30]:
RATINGS_CSV_FILE = './input/rating_result.csv'
TEST_CSV_FILE = './test_result.csv'
USERS_CSV_FILE = './input/user_result.csv'
RECIPE_CSV_FILE = './input/recipe_result.csv'
MODEL_WEIGHTS_FILE = 'recipe_weights.h5'
K_FACTORS = 120
TEST_USER = 200

## Load Recipe data

In [31]:
ratings = pd.read_csv(RATINGS_CSV_FILE, sep=',', encoding='utf-8', usecols=['userid', 'recipeid', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max() + 1
max_recipeid = ratings['recipeid'].drop_duplicates().max() + 1
print(len(ratings), 'ratings loaded.')
print(max_userid)
print(max_recipeid)

(1048575, 'ratings loaded.')
12936
5023


In [32]:
test_ratings = pd.read_csv(TEST_CSV_FILE, sep=',', encoding='utf-8', usecols=['userid', 'recipeid', 'rating'])
print(len(test_ratings), 'ratings loaded.')

(8276, 'ratings loaded.')


In [33]:

users = pd.read_csv(USERS_CSV_FILE, sep=',', encoding='utf-8', usecols=['userid', 'nickname'])
print(len(users), 'descriptions of', max_userid, 'users loaded.')
max_user = len(users)


(75061, 'descriptions of', 12936, 'users loaded.')


In [34]:

recipes = pd.read_csv(RECIPE_CSV_FILE, sep=',', encoding='utf-8', usecols=['recipeid'])#, 'title', 'categories'])
print(len(recipes), 'descriptions of', max_recipeid, 'recipes loaded.')


(5101, 'descriptions of', 5023, 'recipes loaded.')


## Make recommendations for a given user

In [35]:
trained_model = CFModel(max_userid, max_recipeid, K_FACTORS)

In [36]:
trained_model.load_weights(MODEL_WEIGHTS_FILE)

In [37]:
users[users['userid'] == TEST_USER]

Unnamed: 0,userid,nickname
200,200,박지수


In [38]:
def predict_rating(userid, recipeid):
    return trained_model.rate(userid, recipeid)

In [39]:
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'recipeid', 'rating']]
user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['recipeid']), axis=1)
test_rating = user_ratings.sort_values(by='prediction', 
                         ascending=False).merge(recipes, 
                                                on='recipeid',
                                                how='inner',
                                                suffixes=['_u', '_m']
                                               )
print(test_rating)

    userid  recipeid  rating  prediction
0      200        85       3    4.906492
1      200       453       5    4.872457
2      200        35       3    4.858866
3      200       983       5    4.803903
4      200       875       5    4.794510
5      200       997       5    4.786585
6      200       587       5    4.782755
7      200       993       5    4.746210
8      200       859       5    4.739516
9      200       693       5    4.739224
10     200      1073       3    4.735550
11     200       819       4    4.710590
12     200       963       3    4.677085
13     200       287       5    4.669042
14     200       945       4    4.658239
15     200      1032       5    4.653538
16     200       779       5    4.627000
17     200       463       4    4.626307
18     200      1007       5    4.609961
19     200       831       5    4.607461
20     200      1024       5    4.602902
21     200      1080       5    4.582709
22     200       683       3    4.580012
23     200      

In [40]:
recommendations = ratings[ratings['recipeid'].isin(user_ratings['recipeid']) == False][['recipeid']].drop_duplicates()
recommendations['prediction'] = recommendations.apply(lambda x: predict_rating(TEST_USER, x['recipeid']), axis=1)
recommendations.sort_values(by='prediction',
                          ascending=False).merge(recipes,
                                                 on='recipeid',
                                                 how='inner',
                                                 suffixes=['_u', '_m']).head(10)

Unnamed: 0,recipeid,prediction
0,583,5.206446
1,563,5.11718
2,293,5.112055
3,217,5.08215
4,975,5.063018
5,4085,5.06186
6,4215,5.061845
7,195,5.055509
8,4695,5.052623
9,4745,5.041393


In [41]:
import evaluate

In [42]:
def dcg(relevances, rank=10):
    """Discounted cumulative gain at rank (DCG)"""
    relevances = np.asarray(relevances)[:rank]
    n_relevances = len(relevances)
    if n_relevances == 0:
        return 0.

    discounts = np.log2(np.arange(n_relevances) + 2)
    return np.sum(relevances / discounts)

def ndcg(relevances, rank=10):
    """Normalized discounted cumulative gain (NDGC)"""
    best_dcg = dcg(sorted(relevances, reverse=True), rank)
    if best_dcg == 0:
        return 0.

    return dcg(relevances, rank) / best_dcg

In [43]:
print(test_rating)
tprediction = list(test_rating['prediction'])
trating = list(test_rating['rating'])
tprediction = map(round, tprediction) 
tprediction = map(int, tprediction) 
print(tprediction)
print(trating)
print(evaluate.ndcg_score(trating,tprediction, 10))
print(ndcg(tprediction, 1))

    userid  recipeid  rating  prediction
0      200        85       3    4.906492
1      200       453       5    4.872457
2      200        35       3    4.858866
3      200       983       5    4.803903
4      200       875       5    4.794510
5      200       997       5    4.786585
6      200       587       5    4.782755
7      200       993       5    4.746210
8      200       859       5    4.739516
9      200       693       5    4.739224
10     200      1073       3    4.735550
11     200       819       4    4.710590
12     200       963       3    4.677085
13     200       287       5    4.669042
14     200       945       4    4.658239
15     200      1032       5    4.653538
16     200       779       5    4.627000
17     200       463       4    4.626307
18     200      1007       5    4.609961
19     200       831       5    4.607461
20     200      1024       5    4.602902
21     200      1080       5    4.582709
22     200       683       3    4.580012
23     200      

In [44]:
import measures
import random

In [45]:
def test_measures(reference, hypothesis):
    """
    Runs all rank-ordering evaluation measures on given pair of lists.
    """

    print("\t DCG:\t\t\t{0}".format(measures.find_dcg(hypothesis)))
    print("\t NDCG:\t\t\t{0}".format(measures.find_ndcg(reference, hypothesis)))
    print("\t Precision:\t\t{0}".format(measures.find_precision(reference, hypothesis)))
    print("\t Precision at k:\t{0}".format(measures.find_precision_k(reference, hypothesis, len(reference))))
    print("\t Average precision:\t{0}".format(measures.find_average_precision(reference, hypothesis)))
    print("\t RankDCG:\t\t{0}".format(measures.find_rankdcg(reference, hypothesis), "\n"))
print(trating)
print(tprediction)

tprediction = map(round, tprediction)
tprediction = map(int, tprediction)
test_measures(trating, tprediction)

[3, 5, 3, 5, 5, 5, 5, 5, 5, 5, 3, 4, 3, 5, 4, 5, 5, 4, 5, 5, 5, 5, 3, 5, 4, 5, 3, 5, 5, 5, 3, 3, 3, 4, 5, 5, 4, 5, 3, 1, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 3, 5, 1, 1, 1]
[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
	 DCG:			105.509186074
	 NDCG:			1.08258182649
	 Precision:		0.367647058824
	 Precision at k:	0.367647058824
	 Average precision:	0.54124026562
	 RankDCG:		0.0648648648649


In [46]:
sum_dcg = 0
sum_ndcg = 0
sum_pre = 0
sum_preK = 0
sum_avgpre = 0
sum_rankdcg = 0
sum_predict = 0
i = 0
loop_boolean = True
for i in range(0, 100):
    
    TEST_USER = random.randrange(1, i+1000)
    user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'recipeid', 'rating']]
    

    while len(user_ratings) < 100:
        TEST_USER = random.randrange(1, i+1000)
        user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'recipeid', 'rating']]
     
    user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['recipeid']), axis=1)
    test_rating = user_ratings.sort_values(by='rating', 
                             ascending=False).merge(recipes, 
                                                    on='recipeid', 
                                                    how='inner', 
                                                    suffixes=['_u', '_m'])
    print("i :" + str(i) + ", id:" + str(TEST_USER) + ", len: " + str(len(test_rating)))
    tprediction = list(test_rating['prediction'])
    tprediction = map(round, tprediction)
    tprediction = map(int, tprediction)     
    trating = list(test_rating['rating'])

    tprediction_k = tprediction[:10]
    trating_k = trating[:10]
    sum_dcg = measures.find_dcg(tprediction_k)
    sum_ndcg += measures.find_ndcg(trating_k, tprediction_k)
    sum_pre += measures.find_precision(trating_k, tprediction_k)
    sum_preK += measures.find_precision_k(trating_k, tprediction_k, len(tprediction_k))
    sum_avgpre += measures.find_average_precision(trating_k, tprediction_k)
    sum_rankdcg += measures.find_rankdcg(trating, tprediction)

    
    for predict in trating[:10]:
        sum_predict += predict
       

i :0, id:470, len: 113
i :1, id:159, len: 220
i :2, id:351, len: 106
i :3, id:381, len: 150
i :4, id:527, len: 339
i :5, id:799, len: 195
i :6, id:314, len: 295
i :7, id:124, len: 127
i :8, id:938, len: 173
i :9, id:106, len: 2027
i :10, id:737, len: 110
i :11, id:502, len: 261
i :12, id:517, len: 232
i :13, id:502, len: 261
i :14, id:362, len: 100
i :15, id:940, len: 141
i :16, id:167, len: 218
i :17, id:935, len: 349
i :18, id:367, len: 1094
i :19, id:764, len: 279
i :20, id:519, len: 311
i :21, id:793, len: 120
i :22, id:648, len: 142
i :23, id:158, len: 163
i :24, id:1003, len: 188
i :25, id:460, len: 294
i :26, id:495, len: 102
i :27, id:290, len: 240
i :28, id:751, len: 185
i :29, id:408, len: 399
i :30, id:794, len: 169
i :31, id:231, len: 559
i :32, id:393, len: 279
i :33, id:368, len: 759
i :34, id:31, len: 848
i :35, id:437, len: 215
i :36, id:94, len: 175
i :37, id:88, len: 125
i :38, id:233, len: 128
i :39, id:502, len: 261
i :40, id:483, len: 1176
i :41, id:108, len: 156
i

In [47]:
avg_dcg = sum_dcg / 100
avg_ndcg = sum_ndcg / (100)
avg_pre = sum_pre / 100
avg_preK = sum_preK / 100
avg_avgpre = sum_avgpre / 100
avg_rankdcg = sum_rankdcg / (100)
avg_predict = sum_predict / 1000.0

print("\t DCG:\t\t\t{0}".format(avg_dcg))
print("\t NDCG:\t\t\t{0}".format(avg_ndcg))
print("\t Precision:\t\t{0}".format(avg_pre))
print("\t Precision_at_K:\t{0}".format(avg_preK))
print("\t Avrage_precision:\t{0}".format(avg_avgpre))
print("\t RankDCG:\t\t{0}".format(avg_rankdcg))
print("\t avg_predict:\t\t{0}".format(avg_predict))

	 DCG:			0.311611125822
	 NDCG:			0.880091010115
	 Precision:		0.419
	 Precision_at_K:	0.419
	 Avrage_precision:	0.422478571429
	 RankDCG:		0.192134287606
	 avg_predict:		5.0
