## We analyze an online retail data set and provide recommendation to User B depending on User A buying pattern, list the top 10 similar product based on consine similarity Matrix from SkLearn.

### 1. Load the libraries and data set

In [None]:
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
dateparse = lambda x: pd.datetime.strptime(x, '%m/%d/%Y %H:%M')
df = pd.read_csv('/kaggle/input/onlineretail/OnlineRetail.csv', parse_dates=['InvoiceDate'], date_parser=dateparse, encoding = 'unicode_escape')

In [None]:
df.head()

### 2. Prepare the Data

In [None]:
df.shape

Lets get rid of rows lesser than zero

In [None]:
df = df.loc[df['Quantity'] > 0]
df.shape

summary of the CustomerID field

In [None]:
df['CustomerID'].describe()

Get rid of rows consisting o Null values in CustomerID

In [None]:
df['CustomerID'].isna().sum()

So there are null values of customer ID

In [None]:
df.loc[df['CustomerID'].isna()].head()

In [None]:
df.shape

We drop null values for the customers and return rows with valid ID

In [None]:
df1 = df.dropna(subset=['CustomerID'])
df1.shape

In [None]:
df1.head()

### 3. Now we create a Customer-Item Matrix. 

In [None]:
customer_item_matrix = df1.pivot_table(index='CustomerID',columns='StockCode',values='Quantity',aggfunc='sum')

Note: we use 3 columns which include CustomerID, StockCode, Quantity and create a pivot table

In [None]:
customer_item_matrix.loc[12481:].head()

In [None]:
customer_item_matrix.shape

In [None]:
df1['CustomerID'].nunique()

In [None]:
df1['StockCode'].nunique()

In [None]:
customer_item_matrix.loc[12348.0].sum()

One Hot Encoding: We apply lambda function to convert anything above zero to 1 and zero to 0

In [None]:
customer_item_matrix = customer_item_matrix.applymap(lambda x: 1 if x>0 else 0)

In [None]:
customer_item_matrix.loc[12481:].head()

### 4. User based Collaborative Filtering using Sklearn module

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

#### User based Collaborative Filtering
- User to User Similarity Matrix

In [None]:
user_to_user_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix))

In [None]:
user_to_user_sim_matrix.head()

#### We setup user to user matrix table column names and the user ID

In [None]:
user_to_user_sim_matrix.columns = customer_item_matrix.index

In [None]:
user_to_user_sim_matrix['CustomerID'] = customer_item_matrix.index

In [None]:
user_to_user_sim_matrix = user_to_user_sim_matrix.set_index('CustomerID')

In [None]:
user_to_user_sim_matrix.head()

#### Now we make Recommendations

In [None]:
user_to_user_sim_matrix.loc[12350.0].sort_values(ascending = False)

Items bought by User A

In [None]:
items_bought_by_A = set(customer_item_matrix.loc[12350.0].iloc[customer_item_matrix.loc[12350.0].nonzero()].index)

In [None]:
items_bought_by_A

In [None]:
items_bought_by_B = set(customer_item_matrix.loc[17935.0].iloc[customer_item_matrix.loc[17935.0].nonzero()].index)

In [None]:
items_bought_by_B

In [None]:
items_to_recommend_User_B = items_bought_by_A - items_bought_by_B

In [None]:
items_to_recommend_User_B

In [None]:
df1.loc[
    df['StockCode'].isin(items_to_recommend_User_B),
    ['StockCode','Description']
].drop_duplicates().set_index('StockCode')

##### Walah! We just picked a random customer ID and found some recommendation items for him based on User A

### 5. Another type: Item-Based Collaborative Filtering[](http://)

In [None]:
item_item_sim_matrix = pd.DataFrame(cosine_similarity(customer_item_matrix.T))

In [None]:
item_item_sim_matrix.columns = customer_item_matrix.T.index
item_item_sim_matrix['StockCode'] = customer_item_matrix.T.index
item_item_sim_matrix = item_item_sim_matrix.set_index('StockCode')

In [None]:
item_item_sim_matrix.head()

#### Now we make Recommendations

In [None]:
top_10_similar_items = list(
    item_item_sim_matrix\
        .loc['23166']\
        .sort_values(ascending=False)\
        .iloc[:10]\
    .index
)

In [None]:
top_10_similar_items

In [None]:
df.loc[
    df['StockCode'].isin(top_10_similar_items), 
    ['StockCode', 'Description']
].drop_duplicates().set_index('StockCode').loc[top_10_similar_items]

##### Walah! We just got top 10 similar items. In human sense, we can relate its simialar

### You can user this Kernel for commercial or educational use. You can add to it. Kindly leave a thumbs up and comment