# Implementing market basket analysis

In [1]:
#Loading neccesary packages
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
#Reading Data From Web
myretaildata = pd.read_excel('Online Retail.xlsx')
myretaildata.shape

(1128, 8)

# Data Preparation

In [3]:
#Data Cleaning
myretaildata['Description'] = myretaildata['Description'].str.strip() #removes spaces from beginning and end
myretaildata.dropna(axis=0, subset=['InvoiceNo'], inplace=True) #removes duplicate invoice
myretaildata['InvoiceNo'] = myretaildata['InvoiceNo'].astype('str') #converting invoice number to be string
myretaildata = myretaildata[~myretaildata['InvoiceNo'].str.contains('C')] #remove the credit transactions 
myretaildata.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [5]:
myretaildata.shape

(1118, 8)

In [6]:
myretaildata['Country'].value_counts()

United Kingdom    1067
France              20
Germany             15
Australia           14
Netherlands          2
Name: Country, dtype: int64

In [7]:
#Separating transactions for United Kingdom
mybasket = (myretaildata[myretaildata['Country'] =="United Kingdom"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum()
            .unstack().reset_index().fillna(0).set_index('InvoiceNo'))u

In [8]:
#viewing transaction basket
mybasket.head()

Description,10 COLOUR SPACEBOY PEN,12 DAISY PEGS IN WOOD BOX,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE SKULLS,3 PIECE SPACEBOY COOKIE CUTTER SET,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,...,WOODEN FRAME ANTIQUE WHITE,WOODEN HEART CHRISTMAS SCANDINAVIAN,WOODEN OWLS LIGHT GARLAND,WOODEN PICTURE FRAME WHITE FINISH,WRAP COWBOYS,YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YOU'RE CONFUSING ME METAL SIGN,ZINC METAL HEART DECORATION,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536365,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536366,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536367,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0
536369,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
#converting all positive vaues to 1 and everything else to 0
def my_encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

my_basket_sets = mybasket.applymap(my_encode_units)

In [10]:
my_basket_sets.head()

Description,10 COLOUR SPACEBOY PEN,12 DAISY PEGS IN WOOD BOX,12 MESSAGE CARDS WITH ENVELOPES,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE SKULLS,3 PIECE SPACEBOY COOKIE CUTTER SET,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,...,WOODEN FRAME ANTIQUE WHITE,WOODEN HEART CHRISTMAS SCANDINAVIAN,WOODEN OWLS LIGHT GARLAND,WOODEN PICTURE FRAME WHITE FINISH,WRAP COWBOYS,YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YOU'RE CONFUSING ME METAL SIGN,ZINC METAL HEART DECORATION,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536365,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536366,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536367,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536368,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
536369,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Training Model

In [11]:
#Generatig frequent itemsets
my_frequent_itemsets = apriori(my_basket_sets, min_support=0.075, use_colnames=True)

In [12]:
#generating rules
my_rules = association_rules(my_frequent_itemsets, metric="lift", min_threshold=1)

In [13]:
my_rules.shape

(1936, 9)

In [14]:
#viewing Top 5 rules
my_rules.sort_values(by = "lift", ascending = False)[:5]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(GLASS STAR FROSTED T-LIGHT HOLDER),(CREAM CUPID HEARTS COAT HANGER),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
1245,"(WHITE METAL LANTERN, GLASS STAR FROSTED T-LIG...","(RED WOOLLY HOTTIE WHITE HEART., KNITTED UNION...",0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
1262,"(KNITTED UNION FLAG HOT WATER BOTTLE, GLASS ST...","(RED WOOLLY HOTTIE WHITE HEART., WHITE METAL L...",0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
1261,"(KNITTED UNION FLAG HOT WATER BOTTLE, WHITE ME...","(RED WOOLLY HOTTIE WHITE HEART., GLASS STAR FR...",0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf
1258,"(RED WOOLLY HOTTIE WHITE HEART., KNITTED UNION...",(WHITE METAL LANTERN),0.076923,0.076923,0.076923,1.0,13.0,0.071006,inf


# Making recommendations

In [15]:
#Filtering rules based on condition
my_rules[(my_rules['lift'] >= 3) &
       (my_rules['confidence'] >= 0.8)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(GLASS STAR FROSTED T-LIGHT HOLDER),(CREAM CUPID HEARTS COAT HANGER),0.076923,0.076923,0.076923,1.000000,13.000000,0.071006,inf
1,(CREAM CUPID HEARTS COAT HANGER),(GLASS STAR FROSTED T-LIGHT HOLDER),0.076923,0.076923,0.076923,1.000000,13.000000,0.071006,inf
2,(KNITTED UNION FLAG HOT WATER BOTTLE),(CREAM CUPID HEARTS COAT HANGER),0.092308,0.076923,0.076923,0.833333,10.833333,0.069822,5.538462
3,(CREAM CUPID HEARTS COAT HANGER),(KNITTED UNION FLAG HOT WATER BOTTLE),0.076923,0.092308,0.076923,1.000000,10.833333,0.069822,inf
5,(CREAM CUPID HEARTS COAT HANGER),(RED WOOLLY HOTTIE WHITE HEART.),0.076923,0.138462,0.076923,1.000000,7.222222,0.066272,inf
...,...,...,...,...,...,...,...,...,...
1929,(KNITTED UNION FLAG HOT WATER BOTTLE),"(CREAM CUPID HEARTS COAT HANGER, WHITE HANGING...",0.092308,0.076923,0.076923,0.833333,10.833333,0.069822,5.538462
1930,(CREAM CUPID HEARTS COAT HANGER),"(KNITTED UNION FLAG HOT WATER BOTTLE, WHITE HA...",0.076923,0.076923,0.076923,1.000000,13.000000,0.071006,inf
1932,(WHITE METAL LANTERN),"(KNITTED UNION FLAG HOT WATER BOTTLE, CREAM CU...",0.076923,0.076923,0.076923,1.000000,13.000000,0.071006,inf
1933,(GLASS STAR FROSTED T-LIGHT HOLDER),"(KNITTED UNION FLAG HOT WATER BOTTLE, CREAM CU...",0.076923,0.076923,0.076923,1.000000,13.000000,0.071006,inf
