In [7]:
# codes to mount your google drive folder
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/Othercomputers/My Computer (1)/CS608_Recommender_Systems/Project/coffee-joint-rec-sys/notebooks/training/

Mounted at /content/drive
/content/drive/Othercomputers/My Computer (1)/CS608_Recommender_Systems/Project/coffee-joint-rec-sys/notebooks/training


In [1]:
!pip install --quiet cornac==1.14.2

[K     |████████████████████████████████| 12.4 MB 7.5 MB/s 
[?25h

In [68]:
import pandas as pd
from cornac.eval_methods import BaseMethod
from cornac.models import MostPop
from cornac.metrics import Recall, NCRR, NDCG
from cornac import Experiment


In [63]:
PATH = '../../data/'

In [64]:
train = pd.read_csv(PATH + 'processed/' + 'train_lol.csv')[['userid', 'shop', 'rating']]
val = pd.read_csv(PATH + 'processed/' + 'val_lol.csv')[['userid', 'shop', 'rating']]
test = pd.read_csv(PATH + 'processed/' + 'test_lol.csv')[['userid', 'shop', 'rating']]

idx_user_map = dict(enumerate(sorted(set(train.userid))))
idx_shop_map = dict(enumerate(sorted(set(train.shop))))
user_idx_map = {v:k for k,v in idx_user_map.items()}
shop_idx_map = {v:k for k,v in idx_shop_map.items()}

train['userid'] = train['userid'].map(user_idx_map)
train['shop'] = train['shop'].map(shop_idx_map)
val['userid'] = val['userid'].map(user_idx_map)
val['shop'] = val['shop'].map(shop_idx_map)
test['userid'] = test['userid'].map(user_idx_map)
test['shop'] = test['shop'].map(shop_idx_map)

train = list(train.to_records(index=False))
val = list(val.to_records(index=False))
test = list(test.to_records(index=False))

In [65]:
train[:10]

[(0, 350, 3.),
 (0, 434, 4.),
 (0, 699, 4.),
 (0, 431, 5.),
 (1, 596, 3.),
 (2, 638, 4.),
 (2, 268, 5.),
 (2, 727, 5.),
 (2, 626, 4.),
 (2, 399, 5.)]

In [74]:
eval_method = BaseMethod.from_splits(
    train_data=train, 
    test_data=val, 
    exclude_unknowns=False, 
    verbose=True,
)

rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 497
Number of items = 749
Number of ratings = 2822
Max rating = 5.0
Min rating = 1.0
Global mean = 3.8
---
Test data:
Number of users = 497
Number of items = 299
Number of ratings = 497
Number of unknown users = 0
Number of unknown items = 64
---
Total users = 497
Total items = 813


In [84]:
most_pop = MostPop()

eval_metrics = [
    NCRR(k=5),
    NDCG(k=5),
    Recall(k=5),
]

exp = Experiment(eval_method=eval_method, models=[most_pop], metrics=eval_metrics)
exp.run()


[MostPop] Training started!

[MostPop] Evaluation started!


Ranking:   0%|          | 0/497 [00:00<?, ?it/s]


TEST:
...
        | NCRR@5 | NDCG@5 | Recall@5 | Train (s) | Test (s)
------- + ------ + ------ + -------- + --------- + --------
MostPop | 0.0354 | 0.0405 |   0.0563 |    0.0019 |   0.2974



In [87]:
exp.result[0].metric_avg_results

OrderedDict([('NCRR@5', 0.035412474849094565),
             ('NDCG@5', 0.040479962720648216),
             ('Recall@5', 0.056338028169014086),
             ('Train (s)', 0.0019333362579345703),
             ('Test (s)', 0.29741454124450684)])

In [93]:
recommendations, scores = most_pop.rank(1)

In [95]:
# cornac mapping
user_id2idx = most_pop.train_set.uid_map
item_idx2id = dict((v,k) for k,v in most_pop.train_set.iid_map.items())

In [96]:
# apply cornac mapping
recommendations = [item_idx2id[i] for i in recommendations]
# apply own mapping
recommendations = [idx_shop_map[i] for i in recommendations]
recommendations[:10]

['maxwell-food-centre-singapore-3',
 'nylon-coffee-roasters-singapore',
 'chye-seng-huat-hardware-singapore',
 'group-therapy-coffee-singapore',
 'tiong-bahru-bakery-singapore-4',
 'lau-pa-sat-singapore-2',
 'wild-honey-singapore-3',
 'old-airport-road-food-centre-singapore',
 'toms-palette-singapore',
 'oriole-coffee-bar-singapore-2']

In [99]:
filename = '{}.txt'.format(most_pop.name)
textfile = open('../../results/outputs/' + '{}_recommendations.txt'.format(filename), 'w')
textfile.write(" ".join(map(str, recommendations)) + '\n')
textfile.close()