In [1]:
import pandas as pd
import random
from caboose_nbr.tifuknn import TIFUKNN
from caboose_nbr.evaluate_recommendation import evaluate

In [2]:
train_baskets = pd.read_csv('data/instacart_30k/train_baskets.csv.gz')
test_baskets = pd.read_csv('data/instacart_30k/test_baskets.csv')
valid_baskets = pd.read_csv('data/instacart_30k/valid_baskets.csv')

In [3]:
all_users = train_baskets['user_id'].tolist()
sample_users = random.sample(all_users,1000)

In [4]:
train_baskets = train_baskets[train_baskets['user_id'].isin(sample_users)]
test_baskets = test_baskets[test_baskets['user_id'].isin(sample_users)]
valid_baskets = valid_baskets[valid_baskets['user_id'].isin(sample_users)]

In [5]:
train_baskets.shape

(398149, 10)

In [6]:
train_baskets[['user_id','item_id']].drop_duplicates().shape

(122651, 2)

In [7]:
tifu_sklearn_cosine = TIFUKNN(train_baskets, test_baskets, valid_baskets, mode = 'sklearn', distance_metric = 'cosine')
tifu_sklearn_cosine.train()
tifu_sklearn_cosine_preds = tifu_sklearn_cosine.predict()

number of test users: 969
filtered items: 19498
initial data processing
item count: 8066
compute basket reps
10000  baskets passed
20000  baskets passed
compute user reps 969
(969, 8066)
start of knn
knn finished


In [8]:
tifu_caboose = TIFUKNN(train_baskets, test_baskets, valid_baskets,'caboose')
tifu_caboose.train()
tifu_caboose_preds = tifu_caboose.predict()

number of test users: 969
filtered items: 19498
initial data processing
item count: 8066
compute basket reps
10000  baskets passed
20000  baskets passed
compute user reps 969
(969, 8066)
start of knn
knn finished


In [9]:
interactions_to_forget = train_baskets[['user_id','item_id']].drop_duplicates().sample(5000)

In [11]:
tifu_sklearn_cosine.forget_interactions(interactions_to_forget.values.tolist())
tifu_sklearn_cosine_preds_after_forget = tifu_sklearn_cosine.predict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.train_baskets.drop('user_item', axis=1, inplace=True)


compute basket reps
10000  baskets passed
20000  baskets passed
compute user reps 969
(969, 8066)
start of knn
knn finished


In [12]:
tifu_caboose.forget_interactions(interactions_to_forget.values.tolist())
tifu_caboose_preds_after_forget = tifu_caboose.predict()

In [13]:
user_test_baskets_df = test_baskets.groupby('user_id')['item_id'].apply(list).reset_index()
user_test_baskets_dict = dict(zip( user_test_baskets_df['user_id'],user_test_baskets_df['item_id']))

In [14]:
_,_ = evaluate(user_test_baskets_dict,tifu_sklearn_cosine_preds)

10
recall: 0.2901291897953595
ndcg: 0.43514074225738847
20
recall: 0.4117316015106464
ndcg: 0.34966738037466993


In [15]:
_,_ = evaluate(user_test_baskets_dict,tifu_caboose_preds)

10
recall: 0.2901291897953595
ndcg: 0.43514074225738847
20
recall: 0.4117316015106464
ndcg: 0.34966738037466993


In [16]:
_,_ = evaluate(user_test_baskets_dict,tifu_sklearn_cosine_preds_after_forget)

10
recall: 0.2830429291930377
ndcg: 0.4277396440053112
20
recall: 0.4022899544231257
ndcg: 0.34394587077488015


In [17]:
_,_ = evaluate(user_test_baskets_dict,tifu_caboose_preds_after_forget)

10
recall: 0.2830601290554388
ndcg: 0.4277020640277672
20
recall: 0.4023063352444601
ndcg: 0.3439206055504645
