## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from apyori import apriori

## Reading Data

In [2]:
os.chdir("F:\AI&DS\ML\CustomerSegmentProject\\")

retail = pd.read_excel("Online Retail.xlsx")

retail_df = retail.copy()

retail_df.info()

retail_df.describe().round(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    541909 non-null  object        
 1   StockCode    541909 non-null  object        
 2   Description  540455 non-null  object        
 3   Quantity     541909 non-null  int64         
 4   InvoiceDate  541909 non-null  datetime64[ns]
 5   UnitPrice    541909 non-null  float64       
 6   CustomerID   406829 non-null  float64       
 7   Country      541909 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 33.1+ MB


Unnamed: 0,Quantity,UnitPrice,CustomerID
count,541909.0,541909.0,406829.0
mean,9.55,4.61,15287.69
std,218.08,96.76,1713.6
min,-80995.0,-11062.06,12346.0
25%,1.0,1.25,13953.0
50%,3.0,2.08,15152.0
75%,10.0,4.13,16791.0
max,80995.0,38970.0,18287.0


## Data Cleaning

In [3]:
retail_df.shape

retail_df.isna().sum()

retail_df.dropna(inplace=True)

retail_df.shape

retail_df["InvoiceNo"].str.contains('C').count()

retail_df = retail_df[~retail_df["InvoiceNo"].str.contains('C', na=False)]

retail_df.shape

retail_df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


## Adding 'Total Price' column

In [4]:
retail_df['TotalPrice'] = retail_df['Quantity'] * retail_df['UnitPrice']

retail_df.shape

(397924, 9)

## Pivoting For Invoices

In [5]:
retail_df_pivot = retail_df.pivot_table(index='InvoiceNo', 
                          columns='Description', 
                          aggfunc='size', 
                          fill_value=np.nan)

retail_df_pivot.shape

(18536, 3877)

## Model's Input Formating

In [6]:
transactions = []

for i in range(18536):
    productlist = []
    for j in range(3877):
        if str(retail_df_pivot.values[i,j]) != "nan" :
            productlist.append(str(retail_df_pivot.columns[j]))
    productlist.extend(["nan"]*(3877-len(productlist)))
    transactions.append(productlist)

## Training Apriori Model

In [11]:
rules = apriori(transactions= transactions, min_support = 0.01, 
                min_confidence = 0.8, min_lift = 5, min_length = 2,
                max_length = 2)

## Converting the results to list 

In [12]:
res = list(rules)
print(res[0])

RelationRecord(items=frozenset({'PINK REGENCY TEACUP AND SAUCER', 'GREEN REGENCY TEACUP AND SAUCER'}), support=0.024816573154941735, ordered_statistics=[OrderedStatistic(items_base=frozenset({'PINK REGENCY TEACUP AND SAUCER'}), items_add=frozenset({'GREEN REGENCY TEACUP AND SAUCER'}), confidence=0.8273381294964028, lift=22.19325552582536)])


## Foramting the results as a Dataframe 

In [13]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(res), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])
resultsinDataFrame.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.024817,0.827338,22.193256
1,POPPY'S PLAYHOUSE BEDROOM,POPPY'S PLAYHOUSE KITCHEN,0.013703,0.803797,43.061242
2,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE BEDROOM,0.01106,0.813492,47.718003
3,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE KITCHEN,0.011599,0.853175,45.706487
4,REGENCY TEA PLATE PINK,REGENCY TEA PLATE GREEN,0.010898,0.901786,61.909259
5,REGENCY TEA PLATE GREEN,REGENCY TEA PLATE ROSES,0.012354,0.848148,47.930714
6,REGENCY TEA PLATE PINK,REGENCY TEA PLATE ROSES,0.010628,0.879464,49.700457
7,SET/6 RED SPOTTY PAPER CUPS,SET/6 RED SPOTTY PAPER PLATES,0.012732,0.82807,47.228027
8,WOODEN TREE CHRISTMAS SCANDINAVIAN,WOODEN STAR CHRISTMAS SCANDINAVIAN,0.012192,0.818841,35.053185


### Top 10 products sold according to it's support 

In [15]:
results_by_supp = resultsinDataFrame.nlargest(n = 10, columns = 'Support')
results_by_supp.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.024817,0.827338,22.193256
1,POPPY'S PLAYHOUSE BEDROOM,POPPY'S PLAYHOUSE KITCHEN,0.013703,0.803797,43.061242
7,SET/6 RED SPOTTY PAPER CUPS,SET/6 RED SPOTTY PAPER PLATES,0.012732,0.82807,47.228027
5,REGENCY TEA PLATE GREEN,REGENCY TEA PLATE ROSES,0.012354,0.848148,47.930714
8,WOODEN TREE CHRISTMAS SCANDINAVIAN,WOODEN STAR CHRISTMAS SCANDINAVIAN,0.012192,0.818841,35.053185
3,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE KITCHEN,0.011599,0.853175,45.706487
2,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE BEDROOM,0.01106,0.813492,47.718003
4,REGENCY TEA PLATE PINK,REGENCY TEA PLATE GREEN,0.010898,0.901786,61.909259
6,REGENCY TEA PLATE PINK,REGENCY TEA PLATE ROSES,0.010628,0.879464,49.700457


### Top 10 two-products sold according to it's confidence

In [16]:
results_by_conf = resultsinDataFrame.nlargest(n = 10, columns = 'Confidence')
results_by_conf.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
4,REGENCY TEA PLATE PINK,REGENCY TEA PLATE GREEN,0.010898,0.901786,61.909259
6,REGENCY TEA PLATE PINK,REGENCY TEA PLATE ROSES,0.010628,0.879464,49.700457
3,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE KITCHEN,0.011599,0.853175,45.706487
5,REGENCY TEA PLATE GREEN,REGENCY TEA PLATE ROSES,0.012354,0.848148,47.930714
7,SET/6 RED SPOTTY PAPER CUPS,SET/6 RED SPOTTY PAPER PLATES,0.012732,0.82807,47.228027
0,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.024817,0.827338,22.193256
8,WOODEN TREE CHRISTMAS SCANDINAVIAN,WOODEN STAR CHRISTMAS SCANDINAVIAN,0.012192,0.818841,35.053185
2,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE BEDROOM,0.01106,0.813492,47.718003
1,POPPY'S PLAYHOUSE BEDROOM,POPPY'S PLAYHOUSE KITCHEN,0.013703,0.803797,43.061242


### Top 10 two-products sold according to it's lift

In [17]:
results_by_lift = resultsinDataFrame.nlargest(n = 10, columns = 'Lift')
results_by_lift.head(10)

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
4,REGENCY TEA PLATE PINK,REGENCY TEA PLATE GREEN,0.010898,0.901786,61.909259
6,REGENCY TEA PLATE PINK,REGENCY TEA PLATE ROSES,0.010628,0.879464,49.700457
5,REGENCY TEA PLATE GREEN,REGENCY TEA PLATE ROSES,0.012354,0.848148,47.930714
2,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE BEDROOM,0.01106,0.813492,47.718003
7,SET/6 RED SPOTTY PAPER CUPS,SET/6 RED SPOTTY PAPER PLATES,0.012732,0.82807,47.228027
3,POPPY'S PLAYHOUSE LIVINGROOM,POPPY'S PLAYHOUSE KITCHEN,0.011599,0.853175,45.706487
1,POPPY'S PLAYHOUSE BEDROOM,POPPY'S PLAYHOUSE KITCHEN,0.013703,0.803797,43.061242
8,WOODEN TREE CHRISTMAS SCANDINAVIAN,WOODEN STAR CHRISTMAS SCANDINAVIAN,0.012192,0.818841,35.053185
0,PINK REGENCY TEACUP AND SAUCER,GREEN REGENCY TEACUP AND SAUCER,0.024817,0.827338,22.193256
