In [1]:
# Loading libraries
from datetime import datetime, timedelta,date
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pydotplus

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Loading online retailer data
file_path = "C://Users//lenovo//Desktop//7LYTIX//online_retail_II.csv"
onlineRetailer_df = pd.read_csv(file_path, encoding= 'ISO-8859-1')
# Preview the original data
onlineRetailer_df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,12/1/2009 7:45,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,12/1/2009 7:45,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,12/1/2009 7:45,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,12/1/2009 7:45,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,12/1/2009 7:45,1.25,13085.0,United Kingdom


In [3]:
# Check for missing values
onlineRetailer_df.isnull().sum()

Invoice             0
StockCode           0
Description      4372
Quantity            0
InvoiceDate         0
Price               0
Customer ID    236682
Country             0
dtype: int64

In [4]:
# Remove missing values
onlineRetailer_df.dropna(inplace=True)

In [5]:
# Remove extra spaces from descriptions 
onlineRetailer_df['Description'] = onlineRetailer_df['Description'].str.strip()
# Remove rows which dont have invoice number
onlineRetailer_df.dropna(axis=0, subset=['Invoice'], inplace=True)

In [6]:
# Create the basket dataframe object, where the country is United Kingdom, and pivot the result
basket = (onlineRetailer_df[onlineRetailer_df['Country'] =="USA"]
          .groupby(['Invoice', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('Invoice'))

In [7]:
# Preview the basekt
basket

Description,3 RAFFIA RIBBONS 50'S CHRISTMAS,3 TRADITIONAl BISCUIT CUTTERS SET,36 DOILIES DOLLY GIRL,36 DOILIES SPACEBOY DESIGN,5 HOOK HANGER MAGIC TOADSTOOL,5 HOOK HANGER RED MAGIC TOADSTOOL,6 GIFT TAGS 50'S CHRISTMAS,6 GIFT TAGS VINTAGE CHRISTMAS,60 CAKE CASES DOLLY GIRL DESIGN,60 TEATIME FAIRY CAKE CASES,...,VINTAGE UNION JACK BUNTING,VINTAGE UNION JACK SHOPPING BAG,WOOD STAMP SET BEST WISHES,WOODLAND DESIGN COTTON TOTE BAG,WRAP CHRISTMAS VILLAGE,WRAP COWBOYS,WRAP DOLLY GIRL,WRAP ENGLISH ROSE,WRAP I LOVE LONDON,WRAP WEDDING DAY
Invoice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
489444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
494110,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
494113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
494234,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
494891,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0
495815,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
507793,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,48.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
508177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
511685,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
513122,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Function to encoding the basket pivot table, it converts any value bigger than 0 to 1
def encodeingBasket(unit):
    if unit <= 0:
        return 0
    if unit >= 1:
        return 1

basket = basket.applymap(encodeingBasket)

In [9]:
# Generate the frequent item sets that have a support of at least 7%
frequent_products = apriori(basket, min_support=0.07, use_colnames=True)

In [10]:
# Create the association rules model
rules = association_rules(frequent_products, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(60 CAKE CASES DOLLY GIRL DESIGN),(MINI PAINT SET VINTAGE),0.086957,0.086957,0.086957,1.0,11.5,0.079395,inf
1,(MINI PAINT SET VINTAGE),(60 CAKE CASES DOLLY GIRL DESIGN),0.086957,0.086957,0.086957,1.0,11.5,0.079395,inf
2,(60 CAKE CASES DOLLY GIRL DESIGN),(SET OF 36 TEATIME PAPER DOILIES),0.086957,0.347826,0.086957,1.0,2.875,0.056711,inf
3,(SET OF 36 TEATIME PAPER DOILIES),(60 CAKE CASES DOLLY GIRL DESIGN),0.347826,0.086957,0.086957,0.25,2.875,0.056711,1.217391
4,(60 CAKE CASES DOLLY GIRL DESIGN),(SET OF 6 RIBBONS VINTAGE CHRISTMAS),0.086957,0.130435,0.086957,1.0,7.666667,0.075614,inf


In [11]:
# Preivew the result which have an expected confidence (lift) more than 6 and confidence of at least 80%
rules[ (rules['lift'] >= 6) &
       (rules['confidence'] >= 0.8)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(60 CAKE CASES DOLLY GIRL DESIGN),(MINI PAINT SET VINTAGE),0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
1,(MINI PAINT SET VINTAGE),(60 CAKE CASES DOLLY GIRL DESIGN),0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
4,(60 CAKE CASES DOLLY GIRL DESIGN),(SET OF 6 RIBBONS VINTAGE CHRISTMAS),0.086957,0.130435,0.086957,1.0,7.666667,0.075614,inf
7,(60 TEATIME FAIRY CAKE CASES),(PACK OF 60 MUSHROOM CAKE CASES),0.086957,0.130435,0.086957,1.0,7.666667,0.075614,inf
11,(72 SWEETHEART FAIRY CAKE CASES),(PACK OF 72 RETRO SPOT CAKE CASES),0.086957,0.130435,0.086957,1.0,7.666667,0.075614,inf
...,...,...,...,...,...,...,...,...,...
36933,"(VINTAGE UNION JACK BUNTING, TEA TIME TEA SET ...","(PINK 3 PIECE MINI DOTS CUTLERY SET, FELTCRAFT...",0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
36936,"(VINTAGE UNION JACK BUNTING, PINK BLUE FELT CR...","(PINK 3 PIECE MINI DOTS CUTLERY SET, FELTCRAFT...",0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
36937,"(SET OF 36 TEATIME PAPER DOILIES, VINTAGE UNIO...","(PINK 3 PIECE MINI DOTS CUTLERY SET, FELTCRAFT...",0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
36939,(PINK 3 PIECE MINI DOTS CUTLERY SET),"(FELTCRAFT 6 FLOWER FRIENDS, FELTCRAFT BUTTERF...",0.086957,0.086957,0.086957,1.0,11.500000,0.079395,inf
