In [1]:
#!pip install recommenders[gpu] -f https://download.pytorch.org/whl/cu111/torch_stable.html

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
#from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split,python_stratified_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k,
                                                     recall_at_k, get_top_k_items)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.9.13 (tags/v3.9.13:6de2ca5, May 17 2022, 16:36:42) [MSC v.1929 64 bit (AMD64)]
Pandas version: 1.5.3
Tensorflow version: 2.16.1


In [4]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
#MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

SEED = 42

##1. Load dataset

In [5]:
df = pd.read_excel('./Data/Rec_sys_data.xlsx')

In [6]:
df= df[['CustomerID','StockCode','Quantity','DeliveryDate']]

In [7]:
df["StockCode"] = df["StockCode"].apply(lambda x: pd.to_numeric(x, errors='coerce')).dropna()

In [8]:
df = df.dropna()
print(df.shape)
df

(246706, 4)


Unnamed: 0,CustomerID,StockCode,Quantity,DeliveryDate
1,17850,71053.0,6,2010-12-02 08:26:00
2,17850,21730.0,6,2010-12-03 08:26:00
4,17850,22752.0,2,2010-12-04 08:26:00
7,17850,22633.0,6,2010-12-04 08:28:00
8,17850,22632.0,6,2010-12-03 08:28:00
...,...,...,...,...
272399,15249,23399.0,12,2011-10-08 11:37:00
272400,15249,22727.0,4,2011-10-08 11:37:00
272401,15249,23434.0,12,2011-10-08 11:37:00
272402,15249,23340.0,12,2011-10-07 11:37:00


In [9]:
#header=["userID", "itemID", "rating", "timestamp"]

df = df.rename(columns={

    'CustomerID':"userID",'StockCode':"itemID",'Quantity':"rating",'DeliveryDate':"timestamp"

})

df["userID"] = df["userID"].astype(int)
df["itemID"] = df["itemID"].astype(int)

##2. Split the data using the Spark chronological splitter provided in utilities

In [10]:
train, test = python_chrono_split(df, 0.75)

In [11]:
train_file = "./train.csv"
test_file = "./test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)

In [12]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 185041 entries, 37126 to 137334
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   userID     185041 non-null  int32         
 1   itemID     185041 non-null  int32         
 2   rating     185041 non-null  int64         
 3   timestamp  185041 non-null  datetime64[ns]
dtypes: datetime64[ns](1), int32(2), int64(1)
memory usage: 5.6 MB


##3. Train the NCF model on the training data, and get the top-k recommendations for our testing data
NCF accepts implicit feedback and generates prospensity of items to be recommended to users in the scale of 0 to 1. A recommended item list can then be generated based on the scores. Note that this quickstart notebook is using a smaller number of epochs to reduce time for training. As a consequence, the model performance will be slighlty deteriorated.

In [13]:
data = NCFDataset(train_file=train_file, test_file=test_file, seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing ./train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ./test.csv ...
INFO:recommenders.models.ncf.dataset:Creating full leave-one-out test file ./test_full.csv ...
100%|██████████| 3498/3498 [01:03<00:00, 55.29it/s]
INFO:recommenders.models.ncf.dataset:Indexing ./test_full.csv ...


In [14]:
model = NCF (
    n_users=data.n_users,
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [15]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

INFO:recommenders.models.ncf.ncf_singlenode:Epoch 10 [16.07s]: train_loss = 0.309034 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 20 [16.39s]: train_loss = 0.293773 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 30 [16.04s]: train_loss = 0.287381 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 40 [17.20s]: train_loss = 0.282960 
INFO:recommenders.models.ncf.ncf_singlenode:Epoch 50 [16.97s]: train_loss = 0.279234 


Took 826.3232 seconds for training.


In [16]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item)
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 99.6731 seconds for prediction.


##4. Evaluate how well NCF performs
The ranking metrics are used for evaluation.

In [17]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.024868
NDCG:	0.069798
Precision@K:	0.050229
Recall@K:	0.058984


In [18]:
df_order = pd.read_excel('./Data/Rec_sys_data.xlsx', 'order')
df_customer = pd.read_excel('./Data/Rec_sys_data.xlsx', 'customer')
df_product = pd.read_excel('./Data/Rec_sys_data.xlsx', 'product')

In [19]:
all_predictions[all_predictions['userID']==12346].nlargest(5,'prediction')

Unnamed: 0,userID,itemID,timestamp,prediction
185094,12346,23172,NaT,0.984732
185095,12346,23174,NaT,0.983973
185093,12346,23173,NaT,0.980423
185150,12346,23245,NaT,0.979409
185893,12346,23175,NaT,0.975739


In [20]:
all_predictions = all_predictions[['userID','itemID','prediction']]

In [21]:
all_predictions = all_predictions.rename(columns={

    "userID":'CustomerID',"itemID":'StockCode',"rating":'Quantity','prediction':'probability'

})

In [22]:
def recommend_product(customer_id):

  print(" \n---------- Top 5 Bought StockCodes -----------\n")


  print(df_order[df_order['CustomerID']==customer_id][['CustomerID','StockCode','Quantity']].nlargest(5,'Quantity'))

  top_5_bought = df_order[df_order['CustomerID']==customer_id][['CustomerID','StockCode','Quantity']].nlargest(5,'Quantity')

  print('\n-------Product Name of bought StockCodes ------\n')

  print(df_product[df_product.StockCode.isin(top_5_bought.StockCode)]['Product Name'])


  print("\n --------- Top 5 Recommendations ------------ \n")

  print(all_predictions[all_predictions['CustomerID']==customer_id].nlargest(5,'probability'))

  recommend = all_predictions[all_predictions['CustomerID']==customer_id].nlargest(5,'probability')

  print('\n-------Product Name of Recommendations ------\n')

  print(df_product[df_product.StockCode.isin(recommend.StockCode)]['Product Name'])


##5. Recommendation's

In [29]:
recommend_product(13138)

 
---------- Top 5 Bought StockCodes -----------

      CustomerID StockCode  Quantity
3170       13138     22915        24
3178       13138     21422        16
3168       13138     22147        12
3169       13138     22585        12
3172       13138     20971        12

-------Product Name of bought StockCodes ------

355           IN-16/1186 Surfing Rubber Duckies Per Dozen
429     MightySkins Skin Decal Wrap Compatible with Sm...
913     CafePress - Spider-Man Teacher - Women's Dark ...
957     Eye Buy Express Kids Childrens Reading Glasses...
1206                                Women's Breeze Walker
Name: Product Name, dtype: object

 --------- Top 5 Recommendations ------------ 

         CustomerID  StockCode  probability
1515069       13138      22570     0.980341
1515014       13138      22749     0.965419
1515005       13138      22274     0.960423
1515173       13138      22271     0.951713
1515004       13138      20972     0.944929

-------Product Name of Recommendations -

In [24]:
recommend_product(15127)


 
---------- Top 5 Bought StockCodes -----------

        CustomerID StockCode  Quantity
272296       15127     23263        48
272287       15127     23354        24
272288       15127     22813        24
272289       15127     23096        24
272285       15127     21181        12

-------Product Name of bought StockCodes ------

13                billyboards Porcelain School Chalkboard
374     MightySkins Protective Vinyl Skin Decal for Po...
923     Zoan Synchrony Duo Sport Electric Snow Helmet ...
952     MightySkins Skin Decal Wrap Compatible with Sm...
1576    EMPIRE KLIX Klutch Designer Wallet Case for LG G2
Name: Product Name, dtype: object

 --------- Top 5 Recommendations ------------ 

         CustomerID  StockCode  probability
4695096       15127      23355     0.954730
4695146       15127      23356     0.922041
4694949       15127      22158     0.915638
4693806       15127      84347     0.869073
4694415       15127      22141     0.848037

-------Product Name of Recom