# Recommender System

The idea is to create a recommender system that is able to find possible products that the client might would like to buy. 




In [1]:
import pandas as pd
from sklearn import preprocessing
from surprise import KNNWithMeans
from surprise import Dataset
from surprise.model_selection import GridSearchCV
from surprise import Reader

In [2]:
df = pd.read_csv("cleaned_data.csv", encoding="ISO-8859-1")
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,cancelled
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom,0
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,0
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom,0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,0
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom,0


In [3]:
##keep columns from interest 
df = df[['CustomerID','StockCode','Quantity']].groupby(['CustomerID', 'StockCode']).sum().reset_index()
df['StockCode'] = df['StockCode'].astype('category').cat.codes
df['CustomerID'] = df['CustomerID'].astype('category').cat.codes

#Calculate rating in function of the quantity
def calculate_rating(x):
    if(x<=0):
        return 0
    elif(x<3):
        return 1
    elif(x<10):
        return 2
    elif(x<20):
        return 3
    else:
        return 4

df['Rating'] = df['Quantity'].apply(calculate_rating)
df

Unnamed: 0,CustomerID,StockCode,Quantity,Rating
0,0,2001,0,0
1,1,25,24,4
2,1,87,36,4
3,1,130,6,2
4,1,167,40,4
...,...,...,...,...
267610,4371,3087,4,2
267611,4371,3190,96,4
267612,4371,3191,120,4
267613,4371,3193,48,4


In [4]:
#Transform customerID
scaler = preprocessing.MinMaxScaler()
df['CustomerID'] = pd.DataFrame(scaler.fit_transform(df[['CustomerID']]), columns=['CustomerID'])

In [5]:
#See differnt Rating values
df['Rating'].unique()

array([0, 4, 2, 3, 1])

In [6]:
#Print shape
df.shape

(267615, 4)

In [7]:
#take a sample
df_sample = df.sample(n=int(df.shape[0]/4)) 

In [8]:
# Check which are the best parameters using GridSearchCV 


reader = Reader(rating_scale=(0,len(df['Rating'].unique())))
data = Dataset.load_from_df(df_sample[['CustomerID','StockCode','Rating']], reader)
sim_options = {
    "name": ["msd", "cosine"],
    "min_support": [3, 4, 5],
    "user_based": [True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNWithMeans, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print("Score:",gs.best_score["rmse"])
print("Best parameters:",gs.best_params["rmse"])


Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...


  sim = construction_func[name](*args)


Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Score: 0.9577169731652884
Best parameters: {'sim_options': {'name': 'cosine', 'min_support': 5, 'user_based': True}}


In [9]:
#Create the predictor with the best parameters
data = Dataset.load_from_df(df[['CustomerID','StockCode','Rating']], reader)
sim_options = {
    "name": "msd",
    "min_support": 5,
    "user_based": True,  
}
knn = KNNWithMeans(sim_options=sim_options)
trainingSet = data.build_full_trainset()
knn.fit(trainingSet)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x10dcf5940>

In [10]:
#Example: predict score for customer=1 and stockcode=273
round(knn.predict(1,273).est)

4

In [11]:
c = df[df['CustomerID']==1]
c[c['StockCode']==273]

Unnamed: 0,CustomerID,StockCode,Quantity,Rating
267557,1.0,273,30,4


In [12]:
#find stockcodes that the user 1 did not buy yet
customer = list(df[df['CustomerID']==1]['StockCode'].unique())
stockcodes = df['StockCode'].unique()
stockcodes = list(set(stockcodes) -set(customer))

#find the suggested ratings for each of those products and keep the maximum 
customer_id = 1
suggested_rating = -1
suggested_stockcode = -1
for s in stockcodes:
    rating = knn.predict(customer_id, s).est
    if (rating>suggested_rating):
        suggested_rating = rating
        suggested_stockcode = s
print("The suggested product is {} with rating {}.".format(suggested_stockcode, suggested_rating))

The suggested product is 28 with rating 5.
