In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules


In [2]:
df = pd.read_csv("E:\AI-ML PYTHON\OnlineRetail.csv", encoding = 'unicode_escape')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [3]:
df.shape

(541909, 8)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   InvoiceNo    541909 non-null  object 
 1   StockCode    541909 non-null  object 
 2   Description  540455 non-null  object 
 3   Quantity     541909 non-null  int64  
 4   InvoiceDate  541909 non-null  object 
 5   UnitPrice    541909 non-null  float64
 6   CustomerID   406829 non-null  float64
 7   Country      541909 non-null  object 
dtypes: float64(2), int64(1), object(5)
memory usage: 33.1+ MB


In [5]:
df.isnull().sum()

InvoiceNo           0
StockCode           0
Description      1454
Quantity            0
InvoiceDate         0
UnitPrice           0
CustomerID     135080
Country             0
dtype: int64

In [6]:
df.nunique()

InvoiceNo      25900
StockCode       4070
Description     4223
Quantity         722
InvoiceDate    23260
UnitPrice       1630
CustomerID      4372
Country           38
dtype: int64

In [7]:
df[df['Quantity']<0].head()


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
141,C536379,D,Discount,-1,12/1/2010 9:41,27.5,14527.0,United Kingdom
154,C536383,35004C,SET OF 3 COLOURED FLYING DUCKS,-1,12/1/2010 9:49,4.65,15311.0,United Kingdom
235,C536391,22556,PLASTERS IN TIN CIRCUS PARADE,-12,12/1/2010 10:24,1.65,17548.0,United Kingdom
236,C536391,21984,PACK OF 12 PINK PAISLEY TISSUES,-24,12/1/2010 10:24,0.29,17548.0,United Kingdom
237,C536391,21983,PACK OF 12 BLUE PAISLEY TISSUES,-24,12/1/2010 10:24,0.29,17548.0,United Kingdom


In [8]:
df['InvoiceNo'] = df['InvoiceNo'].astype('str')

In [9]:
df = df[~df['InvoiceNo'].str.contains('C', na = False)]


In [10]:
df['InvoiceNo']

0         536365
1         536365
2         536365
3         536365
4         536365
           ...  
541904    581587
541905    581587
541906    581587
541907    581587
541908    581587
Name: InvoiceNo, Length: 532621, dtype: object

In [12]:
df["CustomerID"] = df["CustomerID"].fillna('00000')
df["Description"] = df["Description"].fillna("Unkown")


In [13]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'],errors='coerce')
df['Year'] = df['InvoiceDate'].dt.year
df['Month'] = df['InvoiceDate'].dt.month
df['hour'] = df['InvoiceDate'].dt.hour


In [14]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,2010,12,8
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850,United Kingdom,2010,12,8
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8


In [15]:
df['Month_year'] = pd.to_datetime(df['InvoiceDate']).dt.to_period('M')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,2010,12,8,2010-12
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850,United Kingdom,2010,12,8,2010-12
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12


In [16]:
df['Income'] = round(df['Quantity'] * df['UnitPrice'],2)
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,2010,12,8,2010-12,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850,United Kingdom,2010,12,8,2010-12,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,2010,12,8,2010-12,20.34


In [17]:
df.describe()

Unnamed: 0,Quantity,UnitPrice,Year,Month,hour,Income
count,532621.0,532621.0,532621.0,532621.0,532621.0,532621.0
mean,10.239972,3.847621,2010.921608,7.557864,13.077451,19.985244
std,159.593551,41.758023,0.268787,3.508729,2.438535,270.574241
min,-9600.0,-11062.06,2010.0,1.0,6.0,-11062.06
25%,1.0,1.25,2011.0,5.0,11.0,3.75
50%,3.0,2.08,2011.0,8.0,13.0,9.9
75%,10.0,4.13,2011.0,11.0,15.0,17.7
max,80995.0,13541.33,2011.0,12.0,20.0,168469.6


In [18]:
df[df['Quantity'] > 100].head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
96,536378,21212,PACK OF 72 RETROSPOT CAKE CASES,120,2010-12-01 09:37:00,0.42,14688,United Kingdom,2010,12,9,2010-12,50.4
178,536387,79321,CHILLI LIGHTS,192,2010-12-01 09:58:00,3.82,16029,United Kingdom,2010,12,9,2010-12,733.44
179,536387,22780,LIGHT GARLAND BUTTERFILES PINK,192,2010-12-01 09:58:00,3.37,16029,United Kingdom,2010,12,9,2010-12,647.04
180,536387,22779,WOODEN OWLS LIGHT GARLAND,192,2010-12-01 09:58:00,3.37,16029,United Kingdom,2010,12,9,2010-12,647.04
181,536387,22466,FAIRY TALE COTTAGE NIGHTLIGHT,432,2010-12-01 09:58:00,1.45,16029,United Kingdom,2010,12,9,2010-12,626.4


In [19]:
products = df['StockCode'].nunique()
products

4059

In [25]:
products_group = df[['CustomerID', 'StockCode']].groupby('CustomerID').count().sort_values('StockCode', ascending=False,axis=0)

In [26]:
products_group.head(20)

Unnamed: 0_level_0,StockCode
CustomerID,Unnamed: 1_level_1
0.0,134697
17841.0,7847
14911.0,5677
14096.0,5111
12748.0,4596
14606.0,2700
15311.0,2379
14646.0,2080
13089.0,1818
13263.0,1677


In [27]:
df['Country'].unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Finland',
       'Austria', 'Bahrain', 'Israel', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [28]:
df[df['Country']=='Poland'].head()


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
6608,536971,21733,RED HANGING HEART T-LIGHT HOLDER,32,2010-12-03 13:40:00,2.55,12779,Poland,2010,12,13,2010-12,81.6
6609,536971,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2010-12-03 13:40:00,1.25,12779,Poland,2010,12,13,2010-12,30.0
6610,536971,37446,MINI CAKE STAND WITH HANGING CAKES,8,2010-12-03 13:40:00,1.45,12779,Poland,2010,12,13,2010-12,11.6
6611,536971,37448,CERAMIC CAKE DESIGN SPOTTED MUG,24,2010-12-03 13:40:00,1.49,12779,Poland,2010,12,13,2010-12,35.76
6612,536971,21700,BIG DOUGHNUT FRIDGE MAGNETS,24,2010-12-03 13:40:00,0.85,12779,Poland,2010,12,13,2010-12,20.4


In [29]:
df['InvoiceNo'].nunique()

22064

In [31]:
country_group = df[['Country', 'InvoiceNo']].groupby('Country').count().sort_values('InvoiceNo', ascending = False, axis = 0)
country_group

Unnamed: 0_level_0,InvoiceNo
Country,Unnamed: 1_level_1
United Kingdom,487622
Germany,9042
France,8408
EIRE,7894
Spain,2485
Netherlands,2363
Belgium,2031
Switzerland,1967
Portugal,1501
Australia,1185


In [33]:
df['Description'].unique()

array(['WHITE HANGING HEART T-LIGHT HOLDER', 'WHITE METAL LANTERN',
       'CREAM CUPID HEARTS COAT HANGER', ..., 'lost',
       'CREAM HANGING HEART T-LIGHT HOLDER',
       'PAPER CRAFT , LITTLE BIRDIE'], dtype=object)

In [38]:
df['Description'].value_counts().head(10)

WHITE HANGING HEART T-LIGHT HOLDER    2327
JUMBO BAG RED RETROSPOT               2115
REGENCY CAKESTAND 3 TIER              2019
PARTY BUNTING                         1707
LUNCH BAG RED RETROSPOT               1594
ASSORTED COLOUR BIRD ORNAMENT         1489
Unkown                                1454
SET OF 3 CAKE TINS PANTRY DESIGN      1399
PACK OF 72 RETROSPOT CAKE CASES       1370
LUNCH BAG  BLACK SKULL.               1328
Name: Description, dtype: int64

In [39]:
df[df['Description']=='WHITE HANGING HEART T-LIGHT HOLDER'].head()


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,2010,12,8,2010-12,15.3
49,536373,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 09:02:00,2.55,17850,United Kingdom,2010,12,9,2010-12,15.3
66,536375,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 09:32:00,2.55,17850,United Kingdom,2010,12,9,2010-12,15.3
220,536390,85123A,WHITE HANGING HEART T-LIGHT HOLDER,64,2010-12-01 10:19:00,2.55,17511,United Kingdom,2010,12,10,2010-12,163.2
262,536394,85123A,WHITE HANGING HEART T-LIGHT HOLDER,32,2010-12-01 10:39:00,2.55,13408,United Kingdom,2010,12,10,2010-12,81.6


In [40]:
desc = df['Description'].value_counts().head(20)
desc

WHITE HANGING HEART T-LIGHT HOLDER    2327
JUMBO BAG RED RETROSPOT               2115
REGENCY CAKESTAND 3 TIER              2019
PARTY BUNTING                         1707
LUNCH BAG RED RETROSPOT               1594
ASSORTED COLOUR BIRD ORNAMENT         1489
Unkown                                1454
SET OF 3 CAKE TINS PANTRY DESIGN      1399
PACK OF 72 RETROSPOT CAKE CASES       1370
LUNCH BAG  BLACK SKULL.               1328
NATURAL SLATE HEART CHALKBOARD        1263
JUMBO BAG PINK POLKADOT               1238
HEART OF WICKER SMALL                 1226
PAPER CHAIN KIT 50'S CHRISTMAS        1200
JUMBO STORAGE BAG SUKI                1197
JUMBO SHOPPER VINTAGE RED PAISLEY     1190
LUNCH BAG SPACEBOY DESIGN             1179
LUNCH BAG CARS BLUE                   1174
JAM MAKING SET PRINTED                1169
SPOTTY BUNTING                        1160
Name: Description, dtype: int64

In [41]:
df['CustomerID'].nunique()


4340

In [42]:
df[df['CustomerID'] == '00000'].head()


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
622,536414,22139,Unkown,56,2010-12-01 11:52:00,0.0,0,United Kingdom,2010,12,11,2010-12,0.0
1443,536544,21773,DECORATIVE ROSE BATHROOM BOTTLE,1,2010-12-01 14:32:00,2.51,0,United Kingdom,2010,12,14,2010-12,2.51
1444,536544,21774,DECORATIVE CATS BATHROOM BOTTLE,2,2010-12-01 14:32:00,2.51,0,United Kingdom,2010,12,14,2010-12,5.02
1445,536544,21786,POLKADOT RAIN HAT,4,2010-12-01 14:32:00,0.85,0,United Kingdom,2010,12,14,2010-12,3.4
1446,536544,21787,RAIN PONCHO RETROSPOT,2,2010-12-01 14:32:00,1.66,0,United Kingdom,2010,12,14,2010-12,3.32


In [43]:
customer_group = df[['CustomerID','InvoiceNo']].groupby('CustomerID').count().sort_values('InvoiceNo',axis = 0,ascending=False)
customer_group.head(20)

Unnamed: 0_level_0,InvoiceNo
CustomerID,Unnamed: 1_level_1
0.0,134697
17841.0,7847
14911.0,5677
14096.0,5111
12748.0,4596
14606.0,2700
15311.0,2379
14646.0,2080
13089.0,1818
13263.0,1677


# KPI - knowlege processing information
### to check the which country generating more sales

In [52]:
revenue_generation = df[['Country','Income']].groupby('Country').sum().sort_values('Income',ascending=False)


In [53]:
revenue_generation.head(20)

Unnamed: 0_level_0,Income
Country,Unnamed: 1_level_1
United Kingdom,9003098.0
Netherlands,285446.3
EIRE,283454.0
Germany,228867.1
France,209715.1
Australia,138521.3
Spain,61577.11
Switzerland,57089.9
Belgium,41196.34
Sweden,38378.33


In [54]:
df.groupby('Country')['Income'].sum().sort_values(ascending=False).head(20)


Country
United Kingdom     9.003098e+06
Netherlands        2.854463e+05
EIRE               2.834540e+05
Germany            2.288671e+05
France             2.097151e+05
Australia          1.385213e+05
Spain              6.157711e+04
Switzerland        5.708990e+04
Belgium            4.119634e+04
Sweden             3.837833e+04
Japan              3.741637e+04
Norway             3.616544e+04
Portugal           3.374710e+04
Finland            2.254608e+04
Singapore          2.127929e+04
Channel Islands    2.045044e+04
Denmark            1.895534e+04
Italy              1.748324e+04
Hong Kong          1.569180e+04
Cyprus             1.359038e+04
Name: Income, dtype: float64

In [55]:
df.groupby('Description')['Income'].sum().sort_values(ascending=False).head(10)

Description
DOTCOM POSTAGE                        206248.77
REGENCY CAKESTAND 3 TIER              174484.74
PAPER CRAFT , LITTLE BIRDIE           168469.60
WHITE HANGING HEART T-LIGHT HOLDER    106292.77
PARTY BUNTING                          99504.33
JUMBO BAG RED RETROSPOT                94340.05
MEDIUM CERAMIC TOP STORAGE JAR         81700.92
Manual                                 78112.82
POSTAGE                                78101.88
RABBIT NIGHT LIGHT                     66964.99
Name: Income, dtype: float64

In [56]:
df.groupby('Year')['Income'].sum()

Year
2010    8.237461e+05
2011    9.820814e+06
Name: Income, dtype: float64

In [57]:
df.groupby(['Month_year'])['Income'].sum()


Month_year
2010-12     823746.14
2011-01     691364.56
2011-02     523631.89
2011-03     717639.36
2011-04     537808.62
2011-05     770536.02
2011-06     761739.90
2011-07     719221.19
2011-08     737014.26
2011-09    1058590.17
2011-10    1154979.30
2011-11    1509496.33
2011-12     638792.68
Freq: M, Name: Income, dtype: float64

In [58]:
data = df[df['Country'] == 'Netherlands']
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,Year,Month,hour,Month_year,Income
385,536403,22867,HAND WARMER BIRD DESIGN,96,2010-12-01 11:27:00,1.85,12791,Netherlands,2010,12,11,2010-12,177.6
386,536403,POST,POSTAGE,1,2010-12-01 11:27:00,15.0,12791,Netherlands,2010,12,11,2010-12,15.0
37952,539491,21981,PACK OF 12 WOODLAND TISSUES,12,2010-12-20 10:09:00,0.29,14646,Netherlands,2010,12,10,2010-12,3.48
37953,539491,21986,PACK OF 12 PINK POLKADOT TISSUES,12,2010-12-20 10:09:00,0.29,14646,Netherlands,2010,12,10,2010-12,3.48
37954,539491,22720,SET OF 3 CAKE TINS PANTRY DESIGN,2,2010-12-20 10:09:00,4.95,14646,Netherlands,2010,12,10,2010-12,9.9


In [59]:
basket = data.groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().fillna(0)
basket


Description,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE WOODLAND,16 PIECE CUTLERY SET PANTRY DESIGN,...,WRAP RED APPLES,WRAP WEDDING DAY,YELLOW METAL CHICKEN HEART,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC FOLKART SLEIGH BELLS,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536403,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
539491,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
539731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,192.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
541206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
541570,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578143,0.0,0.0,0.0,0.0,0.0,384.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
579528,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581175,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
581176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [60]:
basketmodel = basket.applymap(lambda x: 1 if x>0 else 0)
basketmodel

Description,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE WOODLAND,16 PIECE CUTLERY SET PANTRY DESIGN,...,WRAP RED APPLES,WRAP WEDDING DAY,YELLOW METAL CHICKEN HEART,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC FOLKART SLEIGH BELLS,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536403,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
539491,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
539731,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
541206,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
541570,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578143,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
579528,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
581175,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
581176,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [61]:
popularmodels = apriori(basketmodel, min_support=0.08, use_colnames=True)
popularmodels

Unnamed: 0,support,itemsets
0,0.105263,(72 SWEETHEART FAIRY CAKE CASES)
1,0.094737,(CARD BIRTHDAY COWBOY)
2,0.126316,(CARD DOLLY GIRL )
3,0.094737,(CARD GINGHAM ROSE )
4,0.084211,(CHARLOTTE BAG APPLES DESIGN)
...,...,...
199,0.084211,"(SPACEBOY LUNCH BOX , DOLLY GIRL LUNCH BOX, SP..."
200,0.084211,"(SPACEBOY LUNCH BOX , STRAWBERRY LUNCH BOX WIT..."
201,0.084211,"(SPACEBOY LUNCH BOX , DOLLY GIRL LUNCH BOX, RO..."
202,0.084211,"(SPACEBOY LUNCH BOX , DOLLY GIRL LUNCH BOX, ST..."


In [62]:
rules = association_rules(popularmodels,metric='lift',min_threshold=1)

In [63]:
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(72 SWEETHEART FAIRY CAKE CASES),(DOLLY GIRL LUNCH BOX),0.105263,0.231579,0.094737,0.9,3.886364,0.07036,7.684211
1,(DOLLY GIRL LUNCH BOX),(72 SWEETHEART FAIRY CAKE CASES),0.231579,0.105263,0.094737,0.409091,3.886364,0.07036,1.51417
2,(SPACEBOY LUNCH BOX ),(72 SWEETHEART FAIRY CAKE CASES),0.294737,0.105263,0.094737,0.321429,3.053571,0.063712,1.31856
3,(72 SWEETHEART FAIRY CAKE CASES),(SPACEBOY LUNCH BOX ),0.105263,0.294737,0.094737,0.9,3.053571,0.063712,7.052632
4,(CARD BIRTHDAY COWBOY),(SPACEBOY BIRTHDAY CARD),0.094737,0.178947,0.084211,0.888889,4.96732,0.067258,7.389474
5,(SPACEBOY BIRTHDAY CARD),(CARD BIRTHDAY COWBOY),0.178947,0.094737,0.084211,0.470588,4.96732,0.067258,1.709942
6,(SPACEBOY LUNCH BOX ),(CARD BIRTHDAY COWBOY),0.294737,0.094737,0.084211,0.285714,3.015873,0.056288,1.267368
7,(CARD BIRTHDAY COWBOY),(SPACEBOY LUNCH BOX ),0.094737,0.294737,0.084211,0.888889,3.015873,0.056288,6.347368
8,(CARD DOLLY GIRL ),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.126316,0.263158,0.094737,0.75,2.85,0.061496,2.947368
9,(ROUND SNACK BOXES SET OF4 WOODLAND ),(CARD DOLLY GIRL ),0.263158,0.126316,0.094737,0.36,2.85,0.061496,1.365132


In [64]:
rules[ (rules['lift'] >= 5) &
       (rules['confidence'] >= 1) ]


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
108,(PACK OF 60 MUSHROOM CAKE CASES),(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
331,"(PLASTERS IN TIN SPACEBOY, LUNCH BAG RED RETRO...",(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
332,"(SPACEBOY BIRTHDAY CARD, LUNCH BAG RED RETROSPOT)",(PLASTERS IN TIN SPACEBOY),0.084211,0.126316,0.084211,1.0,7.916667,0.073573,inf
475,"(PLASTERS IN TIN SPACEBOY, DOLLY GIRL LUNCH BOX)","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.084211,0.189474,0.084211,1.0,5.277778,0.068255,inf
488,"(SPACEBOY LUNCH BOX , RED RETROSPOT CHARLOTTE ...","(DOLLY GIRL LUNCH BOX, ROUND SNACK BOXES SET O...",0.094737,0.168421,0.094737,1.0,5.9375,0.078781,inf
490,"(DOLLY GIRL LUNCH BOX, RED RETROSPOT CHARLOTTE...","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.094737,0.189474,0.094737,1.0,5.277778,0.076787,inf
517,"(DOLLY GIRL LUNCH BOX, SPACEBOY BIRTHDAY CARD)","(SPACEBOY LUNCH BOX , ROUND SNACK BOXES SET OF...",0.084211,0.189474,0.084211,1.0,5.277778,0.068255,inf
568,"(PLASTERS IN TIN SPACEBOY, ROUND SNACK BOXES S...",(SPACEBOY BIRTHDAY CARD),0.084211,0.178947,0.084211,1.0,5.588235,0.069141,inf
569,"(ROUND SNACK BOXES SET OF4 WOODLAND , SPACEBOY...",(PLASTERS IN TIN SPACEBOY),0.084211,0.126316,0.084211,1.0,7.916667,0.073573,inf
572,"(PLASTERS IN TIN SPACEBOY, LUNCH BAG RED RETRO...","(ROUND SNACK BOXES SET OF4 WOODLAND , SPACEBOY...",0.084211,0.136842,0.084211,1.0,7.307692,0.072687,inf


In [66]:
rules['antecedents'][332]

frozenset({'LUNCH BAG RED RETROSPOT', 'SPACEBOY BIRTHDAY CARD'})

In [67]:
basketmodel['PLASTERS IN TIN SPACEBOY'].sum()

12

In [71]:
basketmodel['LUNCH BAG RED RETROSPOT'].sum()

10

In [72]:
basketmodel['SPACEBOY BIRTHDAY CARD'].sum()

17