<h1 style='text-align: center;'> Individual Machine Learning Project </h1> 
<h3 style='text-align: center;'> By Sri Das </h3>

<h2 style='text-align: center;'> Building Recommendation System </h2>

### Importing required packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Importing data and exploration

In [2]:
data = pd.read_csv('recom.csv')

In [3]:
data.head()

Unnamed: 0.1,Unnamed: 0,Main_ID,Transaction_ID,Date,Price,Code_Product,Amount,ItemKey
0,0,90fada91,264f7a69,2022-10-07 20:53:49.153,125.0,5002.0,1.0,5002.0
1,1,9006f9ac,45c7d853,2022-09-17 15:54:57.187,19.0,35012.0,1.0,
2,2,32270891,61ad76dd,2022-11-28 13:51:55.667,141.0,5005.0,1.0,5005.0
3,3,97e03e47,41ee09f6,2022-09-12 16:20:22.110,4.5,35078.5,1.0,
4,4,41949228,244fe6d8,2022-10-14 18:53:43.933,129.5,49291.5,5.0,


In [4]:
# Dropping the first column, 'Unnamed: 0'

data.drop('Unnamed: 0', axis=1, inplace=True)

In [5]:
data.describe()

Unnamed: 0,Price,Code_Product,Amount,ItemKey
count,50000.0,50000.0,50000.0,28597.0
mean,62.56067,32379.29354,1.23264,20775.740952
std,68.269624,21697.500334,0.749353,16481.882853
min,0.5,5000.5,1.0,5000.5
25%,24.5,10013.0,1.0,5011.5
50%,45.5,40009.5,1.0,10023.0
75%,83.0,49291.5,1.0,40028.5
max,2525.5,350027.5,54.0,57035.5


In [6]:
data.shape

(50000, 7)

In [7]:
data.dtypes

Main_ID            object
Transaction_ID     object
Date               object
Price             float64
Code_Product      float64
Amount            float64
ItemKey           float64
dtype: object

In [8]:
data.isnull().sum()

Main_ID               0
Transaction_ID        0
Date                  0
Price                 0
Code_Product          0
Amount                0
ItemKey           21403
dtype: int64

In [9]:
data[data['Code_Product'] != data['ItemKey']]

Unnamed: 0,Main_ID,Transaction_ID,Date,Price,Code_Product,Amount,ItemKey
1,9006f9ac,45c7d853,2022-09-17 15:54:57.187,19.0,35012.0,1.0,
3,97e03e47,41ee09f6,2022-09-12 16:20:22.110,4.5,35078.5,1.0,
4,41949228,244fe6d8,2022-10-14 18:53:43.933,129.5,49291.5,5.0,
5,0e816717,654c3866,2022-10-21 20:53:45.037,72.0,49291.5,1.0,
7,d93c0430,4e39a356,2022-10-21 10:50:28.717,33.5,49292.0,1.0,
...,...,...,...,...,...,...,...
49989,bb3c0a55,7cc8cfe2,2022-11-29 12:53:09.393,96.0,49130.0,1.0,
49990,d2879869,cb98451f,2022-10-22 20:20:11.290,12.5,35075.5,1.0,
49992,00ab8ebd,6272da05,2022-11-28 15:00:31.897,8.0,35078.5,2.0,
49996,a8bc484a,c9946c16,2022-11-18 19:49:01.973,34.0,49292.0,1.0,


Columns 'Code_Product' and 'ItemKey' have identical values but the latter seems to be missing values so dropping it from the data set.

In [10]:
data.drop('ItemKey', axis=1, inplace=True)

In [11]:
data.head()

Unnamed: 0,Main_ID,Transaction_ID,Date,Price,Code_Product,Amount
0,90fada91,264f7a69,2022-10-07 20:53:49.153,125.0,5002.0,1.0
1,9006f9ac,45c7d853,2022-09-17 15:54:57.187,19.0,35012.0,1.0
2,32270891,61ad76dd,2022-11-28 13:51:55.667,141.0,5005.0,1.0
3,97e03e47,41ee09f6,2022-09-12 16:20:22.110,4.5,35078.5,1.0
4,41949228,244fe6d8,2022-10-14 18:53:43.933,129.5,49291.5,5.0


In the data we have the columns: 'Main_ID', a unique ID for each customer, transaction ID, a unique identifier for each transaction, date of sale, price of items, unique_code for each product and number of products purchased per transaction.  

Given there are no item features (such as ratings or explicit feedback) in the data, content-based filtering cannot be used and we will proceed with collaborative filtering recommendation methods. 

In [12]:
# Taking only relevant data for recommendations

data_rel = data[['Main_ID', 'Amount', 'Code_Product']]

In [13]:
data_rel.head()

Unnamed: 0,Main_ID,Amount,Code_Product
0,90fada91,1.0,5002.0
1,9006f9ac,1.0,35012.0
2,32270891,1.0,5005.0
3,97e03e47,1.0,35078.5
4,41949228,5.0,49291.5


In [14]:
data_rel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Main_ID       50000 non-null  object 
 1   Amount        50000 non-null  float64
 2   Code_Product  50000 non-null  float64
dtypes: float64(2), object(1)
memory usage: 1.1+ MB


### Cosine similarity

Before finding similarity between items lets check the number of products purchased by each customer using grouping method on the data

In [15]:
cust_prod = data_rel.groupby(['Main_ID', 'Code_Product']).sum().reset_index()
cust_prod.head()

Unnamed: 0,Main_ID,Code_Product,Amount
0,00024de6,35097.0,2.0
1,00084856,49291.5,1.0
2,00084856,49292.0,1.0
3,0008e848,40002.0,1.0
4,00096930,49291.5,2.0


In [16]:
cust_prod.shape

(45901, 3)

In [17]:
cust = list(np.sort(cust_prod.Main_ID.unique()))
products = list(cust_prod.Code_Product.unique())
amt = list(cust_prod.Amount)
print('Unique customers:{}, unique products:{}'.format(len(cust), len(products)))

Unique customers:28514, unique products:333


In our dataset we have 28514 unique customers and 333 unique products. In the next step we will build the sparse matrix

In [18]:
df_uniq_cust = pd.DataFrame(cust, columns=['customer'])
df_uniq_cust.head()

Unnamed: 0,customer
0,00024de6
1,00084856
2,0008e848
3,00096930
4,000c66b7


In [19]:
from scipy import sparse
from pandas.api.types import CategoricalDtype

In [20]:
rows = cust_prod.Main_ID.astype(CategoricalDtype(categories=cust)).cat.codes

In [21]:
cols = cust_prod.Code_Product.astype(CategoricalDtype(categories=products)).cat.codes

In [22]:
purch_sparse = sparse.csr_matrix((amt, (rows, cols)), shape=(len(cust), len(products)))

In [23]:
purch_sparse

<28514x333 sparse matrix of type '<class 'numpy.float64'>'
	with 45901 stored elements in Compressed Sparse Row format>

We have a matrix with 28514 rows (unique customers) by 333 columns (unique items), filled with amount(quantity) of products purchased by the customers. Let us check the sparsity of this matrix.

In [24]:
mat_size = purch_sparse.shape[0]*purch_sparse.shape[1]
purch_amt = len(purch_sparse.nonzero()[0])
sparsity = 100 *(1-(purch_amt/mat_size))
sparsity

99.51658539369839

The 99.51% of the matrix which captures the customer-product interaction is empty. Out of 28514*333 (9495162) possible interactions we have only 45901 filled. This implies that a lot of the customers have not purchased a large range of products. Given the sparse nature of the data we will proceed with cosine similarity technique for the data. 

In [25]:
#Creating binary data for products purchased

data_bin = cust_prod.copy()
data_bin['purch_yes'] =1
data_bin.head()

Unnamed: 0,Main_ID,Code_Product,Amount,purch_yes
0,00024de6,35097.0,2.0,1
1,00084856,49291.5,1.0,1
2,00084856,49292.0,1.0,1
3,0008e848,40002.0,1.0,1
4,00096930,49291.5,2.0,1


In [26]:
purch_data = data_bin.drop('Amount', axis=1)
purch_data.head()

Unnamed: 0,Main_ID,Code_Product,purch_yes
0,00024de6,35097.0,1
1,00084856,49291.5,1
2,00084856,49292.0,1
3,0008e848,40002.0,1
4,00096930,49291.5,1


In [27]:
purch_data.shape

(45901, 3)

Given the type of the data, to distinguish between customer and product, the letter 'P' is affixed before the product values

In [28]:
purch_data['Code_Product'] = 'P' + purch_data['Code_Product'].astype(str)

In [29]:
df_mat = pd.pivot_table(purch_data, values='purch_yes', index = 'Main_ID', columns='Code_Product')
df_mat.head()

Code_Product,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,P10015.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
Main_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00024de6,,,,,,,,,,,...,,,,,,,,,,
00084856,,,,,,,,,,,...,,,,,,,,,,
0008e848,,,,,,,,,,,...,,,,,,,,,,
00096930,,,,,,,,,,,...,,,,,,,,,,
000c66b7,,,,,,,,,,,...,,,,,,,,,,


In [30]:
df_mat.shape

(28514, 333)

In [31]:
# Filling null values with 0
df_mat=df_mat.fillna(0)

In [32]:
# Resetting the index of pivot table 

df_reset = df_mat.reset_index().rename_axis(None, axis=1)
df_reset.head()

Unnamed: 0,Main_ID,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
0,00024de6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00084856,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0008e848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,00096930,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,000c66b7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
df = df_reset

In [34]:
# Using the above dataframe to get the products only dataframe

dfsalesprod = df.drop('Main_ID', axis=1)
dfsalesprod.head()

Unnamed: 0,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,P10015.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# Normalizing the products dataframe 

dfsalesprodnorm = dfsalesprod/np.sqrt(np.square(dfsalesprod).sum(axis=0))
dfsalesprodnorm.head()

Unnamed: 0,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,P10015.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
# Calculating the .dot vectors to compute Cosine similarities

prod_sim = dfsalesprodnorm.transpose().dot(dfsalesprodnorm)
prod_sim.head()

Unnamed: 0,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,P10015.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
P10000.5,1.0,0.079057,0.0,0.03371,0.017344,0.0,0.029463,0.0,0.023456,0.026747,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P10001.0,0.079057,1.0,0.0,0.0,0.024376,0.0,0.024845,0.0,0.012363,0.009398,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P10001.5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P10002.0,0.03371,0.0,0.0,1.0,0.015591,0.0,0.0,0.174078,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
P10003.0,0.017344,0.024376,0.0,0.015591,1.0,0.019544,0.038154,0.029854,0.085432,0.096903,...,0.0,0.0,0.036564,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
prod_sim.shape

(333, 333)

Alternatively we could use the cosine similarity from scikit learn top calculate the same

In [38]:
from scipy.sparse import coo_matrix, csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder

In [39]:
cust_label_encoder = LabelEncoder()
cust_id = cust_label_encoder.fit_transform(purch_data.Main_ID)

prod_label_encoder = LabelEncoder()
prod_id = prod_label_encoder.fit_transform(purch_data.Code_Product)

#sparse Matrix
salesprodcust_matrix = csr_matrix(([1]*len(cust_id), (prod_id, cust_id)))

salesprodcust_matrix

<333x28514 sparse matrix of type '<class 'numpy.int64'>'
	with 45901 stored elements in Compressed Sparse Row format>

In [40]:
similarity = cosine_similarity(salesprodcust_matrix)

In [41]:
similarity.shape

(333, 333)

In [42]:
similarityDF = pd.DataFrame(similarity)
similarityDF.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,323,324,325,326,327,328,329,330,331,332
0,1.0,0.079057,0.0,0.03371,0.017344,0.0,0.029463,0.0,0.023456,0.026747,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.079057,1.0,0.0,0.0,0.024376,0.0,0.024845,0.0,0.012363,0.009398,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.03371,0.0,0.0,1.0,0.015591,0.0,0.0,0.174078,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.017344,0.024376,0.0,0.015591,1.0,0.019544,0.038154,0.029854,0.085432,0.096903,...,0.0,0.0,0.036564,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Computing the top 10 product recommendations

In [43]:
custsalesprod_matrix = csr_matrix(salesprodcust_matrix.T)

In [44]:
custsalesprod_matrix.shape

(28514, 333)

Combining the purchase history and the similarity matrix to obtain the scores/correlation for each product

In [45]:
custsalesprodscores = custsalesprod_matrix.dot(similarity)

Since we do not want to recommend an already purchased product, its score is negated. The scores are sorted to get Top 10 recommendations for each product. The resulting results are stored in two lists (reco and scores)

In [46]:
cust_reco = []
scores = []

for cust_id in range(custsalesprodscores.shape[0]):
    score = custsalesprodscores[cust_id, :]
    purchased_products = custsalesprod_matrix.indices[custsalesprod_matrix.indptr[cust_id]:
                                            custsalesprod_matrix.indptr[cust_id +1]]
    score[purchased_products] = -1
    top_prod_codes = np.argsort(score)[-10:][::-1]
    recommendations = pd.DataFrame(top_prod_codes.reshape(1, -1), index=[cust_id], 
                                  columns=['Top%s' % (i+1) for i in range(10)])
    cust_reco.append(recommendations)
    scores.append(score)
reco = pd.concat(cust_reco)


In [47]:
# Top 10 Recommendations

reco.head()

Unnamed: 0,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,Top9,Top10
0,189,153,267,166,168,161,122,160,201,121
1,297,254,97,8,262,258,249,285,294,310
2,156,23,16,199,297,263,283,200,308,184
3,297,283,254,97,262,8,258,285,294,249
4,297,254,9,4,298,283,97,308,282,8


In [48]:
# Relabeling the index and columns using the label encoder

reco.index = cust_label_encoder.inverse_transform(reco.index)

for i in range(reco.shape[1]):
    reco.iloc[:, i] = prod_label_encoder.inverse_transform(reco.iloc[:, i])

In [49]:
reco.head()

Unnamed: 0,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,Top9,Top10
00024de6,P35096.5,P35049.0,P48554.5,P35078.5,P35079.5,P35076.0,P35001.0,P35075.5,P40017.5,P35000.5
00084856,P5000.5,P45004.0,P25003.0,P10013.0,P48513.0,P48504.5,P45001.0,P49356.0,P49556.0,P5011.5
0008e848,P35071.5,P15000.5,P10023.5,P40003.0,P5000.5,P48522.5,P49292.0,P40009.5,P5009.0,P35093.0
00096930,P5000.5,P49292.0,P45004.0,P25003.0,P48513.0,P10013.0,P48504.5,P49356.0,P49556.0,P45001.0
000c66b7,P5000.5,P45004.0,P10015.0,P10003.0,P5001.0,P49292.0,P25003.0,P5009.0,P49291.5,P10013.0


In [50]:
# Scores for the Top 10 recommended products
prod_scores = pd.DataFrame(scores)

In [51]:
prod_scores.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,323,324,325,326,327,328,329,330,331,332
0,0.0,0.0,0.0,0.0,0.011863,0.0,0.012091,0.0,0.021058,0.004574,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.041416,0.029042,0.006844,0.009308,0.131641,0.036765,0.083782,0.017823,0.182504,0.128639,...,0.0,0.0,0.0,0.025506,0.012806,0.005101,0.010821,0.0,0.015566,0.0
2,0.009068,0.0,0.0,0.0,0.008388,0.020438,0.012825,0.0,0.019144,0.006468,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.020884,0.011007,0.0,0.004693,0.063588,0.007845,0.041841,0.008987,0.088993,0.065791,...,0.0,0.0,0.0,0.0,0.004317,0.0,0.0,0.0,0.015566,0.0
4,0.052539,0.023877,0.015101,0.0,0.154379,0.033029,0.081964,0.019496,0.139584,0.159414,...,0.0,0.0,0.0,0.0,0.009365,0.011256,0.027524,0.0,0.0,0.0


In [52]:
# Renaming the rows and columns

prod_scores.index = reco.index
prod_scores.columns = prod_label_encoder.inverse_transform(prod_scores.columns)

In [53]:
prod_scores.head()

Unnamed: 0,P10000.5,P10001.0,P10001.5,P10002.0,P10003.0,P10003.5,P10004.0,P10006.0,P10013.0,P10015.0,...,P57035.5,P60020.5,P60047.5,P60048.0,P60048.5,P60049.0,P60049.5,P60050.5,P60051.0,P60052.0
00024de6,0.0,0.0,0.0,0.0,0.011863,0.0,0.012091,0.0,0.021058,0.004574,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00084856,0.041416,0.029042,0.006844,0.009308,0.131641,0.036765,0.083782,0.017823,0.182504,0.128639,...,0.0,0.0,0.0,0.025506,0.012806,0.005101,0.010821,0.0,0.015566,0.0
0008e848,0.009068,0.0,0.0,0.0,0.008388,0.020438,0.012825,0.0,0.019144,0.006468,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00096930,0.020884,0.011007,0.0,0.004693,0.063588,0.007845,0.041841,0.008987,0.088993,0.065791,...,0.0,0.0,0.0,0.0,0.004317,0.0,0.0,0.0,0.015566,0.0
000c66b7,0.052539,0.023877,0.015101,0.0,0.154379,0.033029,0.081964,0.019496,0.139584,0.159414,...,0.0,0.0,0.0,0.0,0.009365,0.011256,0.027524,0.0,0.0,0.0


In [54]:
# Function to obtain the Top 10 product recommendations and respective score for a customer
def top10_reco(cust_id, recommendations, scores): 
    
    prod10 = reco[recommendations.index == cust_id].copy()
    score_cust = scores[scores.index == cust_id]
    score = []

    for i in prod10.columns :
        prod = prod10.loc[cust_id, i]
        score.append(score_cust.loc[cust_id,prod])
    
    prod10.loc['Score'] = score
    print('Top 10 product recommendations and Score for the customer {} are :'.format(cust_id))
    return prod10 

In [55]:
top10_reco('ffff4aa9', reco, prod_scores)

Top 10 product recommendations and Score for the customer ffff4aa9 are :


Unnamed: 0,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,Top9,Top10
ffff4aa9,P5000.5,P10003.0,P45004.0,P25003.0,P10013.0,P10015.0,P10023.5,P5001.0,P48513.0,P49567.5
Score,0.121146,0.104095,0.095154,0.094152,0.093942,0.088531,0.088426,0.083604,0.082358,0.075776


### Exporting the recommendations to Excel

In [57]:
reco.to_excel("Top10ProductRecos.xlsx", index=True, index_label='Customer')