# Recommender System

The idea is to create a recommender system that is able to find possible products that the client might would like to buy. 




In [57]:
#antiquity
aux = df.copy()
aux['InvoiceDate'] = pd.to_datetime(aux['InvoiceDate'])
customers = aux[['CustomerID', 'InvoiceDate']].groupby(['CustomerID']).min().reset_index()
customers['InvoiceDate'] = customers['InvoiceDate'].apply(lambda x: (pd.datetime.now() - x).days)
customers.rename(columns={'InvoiceDate':'antiquity'}, inplace=True)

#total times in purchase
aux = df.copy()
aux = aux[['CustomerID','StockCode','InvoiceNo']].groupby(['CustomerID', 'StockCode']).count().reset_index()
customers = customers.merge(aux, how='outer', left_on='CustomerID', right_on='CustomerID')
customers.rename(columns={'InvoiceNo':'purchases'}, inplace=True)

#total sales, total items purchased, times cancelled
aux = df.copy()
aux['sales'] = aux['Quantity']*aux['UnitPrice']

aux = aux[['CustomerID','StockCode','Quantity','cancelled', 'sales']].groupby(['CustomerID', 'StockCode']).sum().reset_index()
customers = customers.merge(aux, how='outer', left_on=['CustomerID','StockCode'], right_on=['CustomerID','StockCode'])
customers['sales_level'] = pd.cut(customers['sales'], 
                            bins=[customers['sales'].min()-1,
                                  0,
                                  customers['sales'].mean()/3,
                                  customers['sales'].mean()*2/3,
                                  customers['sales'].mean(),
                                  customers['sales'].max()],
                            labels=[1,2,3,4,5])
customers['quantity_level'] = pd.cut(customers['Quantity'], 
                            bins=[customers['Quantity'].min()-1,
                                  0,
                                  customers['Quantity'].mean()/2,
                                  customers['Quantity'].mean(),
                                  customers['Quantity'].max()],
                            labels=[1,2,3,4])
customers['StockCode'] = customers['StockCode'].astype('category').cat.codes
customers['CustomerID'] = customers['CustomerID'].astype('category').cat.codes
customers['quantity_level'] = customers['quantity_level'].cat.codes
customers['sales_level'] = customers['sales_level'].cat.codes
def calculate_rating(x):
    if(x<=0):
        return 0
    elif(x<3):
        return 1
    elif(x<10):
        return 2
    elif(x<20):
        return 3
    else:
        return 4

customers['Rating'] = customers['Quantity'].apply(calculate_rating)

customers

Unnamed: 0,CustomerID,antiquity,StockCode,purchases,Quantity,cancelled,sales,sales_level,quantity_level,Rating
0,0,3333,2001,2,0,1,0.00,0,0,0
1,1,3375,25,1,24,0,6.00,1,3,4
2,1,3375,87,1,36,0,10.80,2,3,4
3,1,3375,130,1,6,0,17.70,2,1,2
4,1,3375,167,4,40,0,34.00,4,3,4
...,...,...,...,...,...,...,...,...,...,...
267610,4371,3209,3087,1,4,0,15.00,2,1,2
267611,4371,3209,3190,2,96,0,139.20,4,3,4
267612,4371,3209,3191,3,120,0,176.40,4,3,4
267613,4371,3209,3193,2,48,0,79.20,4,3,4


In [58]:
#Scale CustomerID column
from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()
customers['CustomerID'] = pd.DataFrame(scaler.fit_transform(customers[['CustomerID']]), columns=['CustomerID'])
customers

Unnamed: 0,CustomerID,antiquity,StockCode,purchases,Quantity,cancelled,sales,sales_level,quantity_level,Rating
0,0.000000,3333,2001,2,0,1,0.00,0,0,0
1,0.000229,3375,25,1,24,0,6.00,1,3,4
2,0.000229,3375,87,1,36,0,10.80,2,3,4
3,0.000229,3375,130,1,6,0,17.70,2,1,2
4,0.000229,3375,167,4,40,0,34.00,4,3,4
...,...,...,...,...,...,...,...,...,...,...
267610,1.000000,3209,3087,1,4,0,15.00,2,1,2
267611,1.000000,3209,3190,2,96,0,139.20,4,3,4
267612,1.000000,3209,3191,3,120,0,176.40,4,3,4
267613,1.000000,3209,3193,2,48,0,79.20,4,3,4


In [59]:
customers['Rating'].unique()

array([0, 4, 2, 3, 1])

In [60]:
# Check which are the best parameters using GridSearchCV 
from surprise import KNNWithMeans
from surprise import Dataset
from surprise.model_selection import GridSearchCV
from surprise import Reader

reader = Reader(rating_scale=(0,len(customers['Rating'].unique())))
data = Dataset.load_from_df(customers[['CustomerID','StockCode','Rating']], reader)
sim_options = {
    "name": ["msd", "cosine"],
    "min_support": [3, 4, 5],
    "user_based": [True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
0.7181876490839948
{'sim_options': {'name': 'msd', 'min_support': 3, 'user_based': True}}


In [61]:
#Create the predictor with the best parameters
from surprise import KNNWithMeans
sim_options = {
    "name": "msd",
    "min_support": 3,
    "user_based": True,  
}
knn = KNNWithMeans(sim_options=sim_options)
trainingSet = data.build_full_trainset()
knn.fit(trainingSet)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x118a6bc50>

In [62]:
#Example: predict score for customer=1 and stockcode=273
round(knn.predict(1,273).est)

4.0

In [63]:
c = customers[customers['CustomerID']==1]
c[c['StockCode']==273]

Unnamed: 0,CustomerID,antiquity,StockCode,purchases,Quantity,cancelled,sales,sales_level,quantity_level,Rating
267557,1.0,3209,273,1,30,0,37.5,4,3,4


In [64]:
#find stockcodes that the user 1 did not buy yet
customer = list(customers[customers['CustomerID']==1]['StockCode'].unique())
stockcodes = customers['StockCode'].unique()
stockcodes = list(set(stockcodes) -set(customer))

#find the suggested ratings for each of those products and keep the maximum 
customer_id = 1
suggested_rating = -1
suggested_stockcode = -1
for s in stockcodes:
    rating = knn.predict(customer_id, s).est
    if (rating>suggested_rating):
        suggested_rating = rating
        suggested_stockcode = s
print("The suggested product is {} with rating {}.".format(suggested_stockcode, suggested_rating))

The suggested product is 45 with rating 5.
