In [3]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
import numpy as np
import pickle
import scipy.sparse as sp
import pandas as pd

In [2]:
train_file = "../datasets/AOW-private/TRAIN_AOW_100negative"

In [3]:
model_file = "models/lightfm_model_cleaned_AOW_data_bpr_loss.pkl"
models_file = "models/lightfm_models_with_grid_cleaned_AOW_data_bpr_loss.pkl"

In [3]:
df_train_neg = pickle.load(open(train_file, "rb"))

In [6]:
df_train = df_train_neg[df_train_neg["labels"] == 1]

In [7]:
model = LightFM(learning_rate=0.05, loss='bpr')

In [8]:
df_train.head(5)

Unnamed: 0,id_user,id_item,labels
0,1,241,1
1,1,1066,1
2,1,954,1
3,1,161,1
4,1,1503,1


In [9]:
users_count = df_train["id_user"].unique().size

In [10]:
users_count

163566

In [11]:
items_count = df_train["id_item"].unique().size

In [12]:
items_count

1828

In [36]:
df_train["id_user"].max()

361571

In [40]:
print(df_train["id_item"].min())
print(df_train["id_item"].max())

1
1841


In [48]:
user_id_mappings = dict(zip(df_train["id_user"].unique(), range(users_count)))

In [49]:
item_id_mappings = dict(zip(df_train["id_item"].unique(), range(items_count)))

In [50]:
def build_interaction_matrix(rows, cols, data):

    mat = sp.lil_matrix((rows, cols), dtype=np.int32)

    for uid, iid, label in data.values:
        mat[user_id_mappings[uid], item_id_mappings[iid]] = label

    return mat.tocoo()


In [51]:
train_matrix = build_interaction_matrix(users_count, items_count, df_train)

In [65]:
model = LightFM(learning_rate=0.05, loss='bpr')
model.fit(train_matrix, epochs=20)

<lightfm.lightfm.LightFM at 0x7fb9ab3344a8>

In [66]:
train_precision = precision_at_k(model, train_matrix, k=10).mean()

In [67]:
train_precision

0.4658511

In [68]:
train_precision = precision_at_k(model, train_matrix, k=5).mean()

In [69]:
train_precision

0.54669553

In [70]:
train_auc = auc_score(model, train_matrix).mean()

In [71]:
train_auc

0.8839857

In [72]:
model_dict = {
    'user_id_mappings': user_id_mappings,
    'item_id_mapings': item_id_mappings,
    'model': model
}

In [74]:
pickle.dump(model_dict, open(model_file, "wb"))

In [76]:
lrs = [ 0.01, 0.05, 0.1 ]
epochs = [ 10, 20 ]


In [77]:
models = [ { 'lr': lr, 'epoch': epoch, 'model': LightFM(learning_rate=lr, loss='bpr').fit(train_matrix, epochs=epoch )} for lr in lrs for epoch in epochs ]

In [79]:
pickle.dump(models, open(models_file, "wb"))

In [4]:
saved_model = pickle.load(open(models_file, 'rb'))

In [5]:
saved_model

[{'epoch': 10,
  'lr': 0.01,
  'model': <lightfm.lightfm.LightFM at 0x7f497c026ac8>},
 {'epoch': 20,
  'lr': 0.01,
  'model': <lightfm.lightfm.LightFM at 0x7f497c026eb8>},
 {'epoch': 10,
  'lr': 0.05,
  'model': <lightfm.lightfm.LightFM at 0x7f497c032390>},
 {'epoch': 20,
  'lr': 0.05,
  'model': <lightfm.lightfm.LightFM at 0x7f497c032828>},
 {'epoch': 10,
  'lr': 0.1,
  'model': <lightfm.lightfm.LightFM at 0x7f497c032cc0>},
 {'epoch': 20,
  'lr': 0.1,
  'model': <lightfm.lightfm.LightFM at 0x7f497c036198>}]

### Testing the models

In [4]:
test_file = "../datasets/AOW-private/test.txt"

In [5]:
test_df = pd.read_csv(test_file, sep="\t", names=["user", "item", "label"], header=None)

In [9]:
test_df['user'].unique().size

163566

In [12]:
items_count = test_df["item"].unique().size

In [13]:
items_count

1710

In [14]:
print(test_df["item"].min())
print(test_df["item"].max())

1
1839
