In [1]:
# codes to mount your google drive folder
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/Othercomputers/My Computer (1)/CS608_Recommender_Systems/Project/coffee-joint-rec-sys/notebooks/training/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/Othercomputers/My Computer (1)/CS608_Recommender_Systems/Project/coffee-joint-rec-sys/notebooks/training


In [2]:
!pip install --quiet cornac==1.14.2

In [3]:
import pandas as pd
from cornac.eval_methods import BaseMethod
from cornac.models import MostPop
from cornac.metrics import Recall, NCRR, NDCG
from cornac import Experiment


In [4]:
PATH = '../../data/'

In [5]:
def process_data(train, val, test):
  """Process train, val, test into UIR format accepted by cornac
  and creates the mapping of users and items both ways

  Args: 
    train (pd.DataFrame):  train set in UIR format
    val   (pd.DataFrame):  val set in UIR format
    test  (pd.DataFrame):  test set in UIR format

  Returns:
    train (list):  each record is a tuple of UIR
    val   (list):  each record is a tuple of UIR
    test  (list):  each record is a tuple of UIR
    idx_user_map (dict): index to user mapping
    idx_shop_map (dict): index to shop mapping
    user_idx_map (dict): user to index mapping
    shop_idx_map (dict): shop to index mapping
  """

  idx_user_map = dict(enumerate(sorted(set(train.userid))))
  idx_shop_map = dict(enumerate(sorted(set(train.shop))))
  user_idx_map = {v:k for k,v in idx_user_map.items()}
  shop_idx_map = {v:k for k,v in idx_shop_map.items()}

  train['userid'] = train['userid'].map(user_idx_map)
  train['shop'] = train['shop'].map(shop_idx_map)
  val['userid'] = val['userid'].map(user_idx_map)
  val['shop'] = val['shop'].map(shop_idx_map)
  test['userid'] = test['userid'].map(user_idx_map)
  test['shop'] = test['shop'].map(shop_idx_map)

  train = list(train.to_records(index=False))
  val = list(val.to_records(index=False))
  test = list(test.to_records(index=False))

  return train, val, test, idx_user_map, idx_shop_map, user_idx_map, shop_idx_map


### Leave One Last

In [6]:
train = pd.read_csv(PATH + 'processed/' + 'train_lol.csv')[['userid', 'shop', 'rating']]
val = pd.read_csv(PATH + 'processed/' + 'val_lol.csv')[['userid', 'shop', 'rating']]
test = pd.read_csv(PATH + 'processed/' + 'test_lol.csv')[['userid', 'shop', 'rating']]

train, val, test, idx_user_map, idx_shop_map, user_idx_map, shop_idx_map = process_data(train,val,test)


# idx_user_map = dict(enumerate(sorted(set(train.userid))))
# idx_shop_map = dict(enumerate(sorted(set(train.shop))))
# user_idx_map = {v:k for k,v in idx_user_map.items()}
# shop_idx_map = {v:k for k,v in idx_shop_map.items()}

# train['userid'] = train['userid'].map(user_idx_map)
# train['shop'] = train['shop'].map(shop_idx_map)
# val['userid'] = val['userid'].map(user_idx_map)
# val['shop'] = val['shop'].map(shop_idx_map)
# test['userid'] = test['userid'].map(user_idx_map)
# test['shop'] = test['shop'].map(shop_idx_map)

# train = list(train.to_records(index=False))
# val = list(val.to_records(index=False))
# test = list(test.to_records(index=False))

In [7]:
train[:10]

[(0, 350, 3.),
 (0, 434, 4.),
 (0, 699, 4.),
 (0, 431, 5.),
 (1, 596, 3.),
 (2, 638, 4.),
 (2, 268, 5.),
 (2, 727, 5.),
 (2, 626, 4.),
 (2, 399, 5.)]

In [8]:
eval_method = BaseMethod.from_splits(
    train_data=train, 
    test_data=val, 
    rating_threshold=3.5,
    exclude_unknowns=True, 
    verbose=True,
)

rating_threshold = 3.5
exclude_unknowns = True
---
Training data:
Number of users = 497
Number of items = 749
Number of ratings = 2822
Max rating = 5.0
Min rating = 1.0
Global mean = 3.8
---
Test data:
Number of users = 433
Number of items = 235
Number of ratings = 433
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 497
Total items = 749


In [9]:
most_pop = MostPop(name='MostPop_lol')

eval_metrics = [
    NCRR(k=5),
    NDCG(k=5),
    Recall(k=5),
]

exp = Experiment(eval_method=eval_method, models=[most_pop], metrics=eval_metrics)
exp.run()


[MostPop_lol] Training started!

[MostPop_lol] Evaluation started!


Ranking:   0%|          | 0/433 [00:00<?, ?it/s]


TEST:
...
            | NCRR@5 | NDCG@5 | Recall@5 | Train (s) | Test (s)
----------- + ------ + ------ + -------- + --------- + --------
MostPop_lol | 0.0490 | 0.0544 |   0.0717 |    0.0016 |   0.2812



In [10]:
exp.result[0].metric_avg_results

OrderedDict([('NCRR@5', 0.04896851248642779),
             ('NDCG@5', 0.0544426908929043),
             ('Recall@5', 0.07166123778501629),
             ('Train (s)', 0.001598358154296875),
             ('Test (s)', 0.28119826316833496)])

In [11]:
recommendations, scores = most_pop.rank(1)

In [12]:
# cornac mapping
user_id2idx = most_pop.train_set.uid_map
item_idx2id = dict((v,k) for k,v in most_pop.train_set.iid_map.items())

In [13]:
# apply cornac mapping
recommendations = [item_idx2id[i] for i in recommendations]
# apply own mapping
recommendations = [idx_shop_map[i] for i in recommendations]
recommendations[:10]

['maxwell-food-centre-singapore-3',
 'nylon-coffee-roasters-singapore',
 'chye-seng-huat-hardware-singapore',
 'group-therapy-coffee-singapore',
 'tiong-bahru-bakery-singapore-4',
 'lau-pa-sat-singapore-2',
 'wild-honey-singapore-3',
 'old-airport-road-food-centre-singapore',
 'toms-palette-singapore',
 'oriole-coffee-bar-singapore-2']

In [14]:
filename = '{}.txt'.format(most_pop.name)
textfile = open('../../results/outputs/' + '{}_recommendations.txt'.format(filename), 'w')
textfile.write(" ".join(map(str, recommendations)) + '\n')
textfile.close()

### Temporal Global Split

In [15]:
train = pd.read_csv(PATH + 'processed/' + 'train_tgs.csv')[['userid', 'shop', 'rating']]
val = pd.read_csv(PATH + 'processed/' + 'val_tgs.csv')[['userid', 'shop', 'rating']]
test = pd.read_csv(PATH + 'processed/' + 'test_tgs.csv')[['userid', 'shop', 'rating']]

train, val, test, idx_user_map, idx_shop_map, user_idx_map, shop_idx_map = process_data(train,val,test)

In [16]:
train[:10]

[(92, 613, 5.),
 (92, 374, 3.),
 (92, 406, 4.),
 (92, 142, 5.),
 (92, 542, 4.),
 (92, 316, 4.),
 (227, 240, 5.),
 (227, 403, 5.),
 (227, 160, 4.),
 (92, 270, 2.)]

In [17]:
eval_method = BaseMethod.from_splits(
    train_data=train, 
    test_data=val, 
    rating_threshold=3.5,
    exclude_unknowns=True, 
    verbose=True,
)

rating_threshold = 3.5
exclude_unknowns = True
---
Training data:
Number of users = 426
Number of items = 699
Number of ratings = 3052
Max rating = 5.0
Min rating = 1.0
Global mean = 3.7
---
Test data:
Number of users = 72
Number of items = 114
Number of ratings = 159
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 426
Total items = 699


In [18]:
most_pop = MostPop(name='MostPop_tgs')

eval_metrics = [
    NCRR(k=5),
    NDCG(k=5),
    Recall(k=5),
]

exp = Experiment(eval_method=eval_method, models=[most_pop], metrics=eval_metrics)
exp.run()


[MostPop_tgs] Training started!

[MostPop_tgs] Evaluation started!


Ranking:   0%|          | 0/72 [00:00<?, ?it/s]


TEST:
...
            | NCRR@5 | NDCG@5 | Recall@5 | Train (s) | Test (s)
----------- + ------ + ------ + -------- + --------- + --------
MostPop_tgs | 0.0269 | 0.0330 |   0.0491 |    0.0016 |   0.1030



In [19]:
exp.result[0].metric_avg_results

OrderedDict([('NCRR@5', 0.026889446472019463),
             ('NDCG@5', 0.03300749588466491),
             ('Recall@5', 0.049107142857142856),
             ('Train (s)', 0.0015790462493896484),
             ('Test (s)', 0.10295534133911133)])

In [20]:
recommendations, scores = most_pop.rank(1)

In [21]:
# cornac mapping
user_id2idx = most_pop.train_set.uid_map
item_idx2id = dict((v,k) for k,v in most_pop.train_set.iid_map.items())

In [22]:
# apply cornac mapping
recommendations = [item_idx2id[i] for i in recommendations]
# apply own mapping
recommendations = [idx_shop_map[i] for i in recommendations]
recommendations[:10]

['maxwell-food-centre-singapore-3',
 'chye-seng-huat-hardware-singapore',
 'nylon-coffee-roasters-singapore',
 'group-therapy-coffee-singapore',
 'tiong-bahru-bakery-singapore-4',
 'old-airport-road-food-centre-singapore',
 'ps-cafe-singapore-8',
 'lau-pa-sat-singapore-2',
 'toms-palette-singapore',
 'two-men-bagel-house-singapore']

In [23]:
filename = '{}.txt'.format(most_pop.name)
textfile = open('../../results/outputs/' + '{}_recommendations.txt'.format(filename), 'w')
textfile.write(" ".join(map(str, recommendations)) + '\n')
textfile.close()