In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
data=pd.read_excel('online_retail.xlsx')

In [3]:
data.head(5)

Unnamed: 0,InvoiceNo,StockCode,lower,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,white hanging heart t-light holder,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,white metal lantern,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,cream cupid hearts coat hanger,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,knitted union flag hot water bottle,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,red woolly hottie white heart.,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
data.columns

Index(['InvoiceNo', 'StockCode', 'lower', 'Description', 'Quantity',
       'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [5]:
data.Country.unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan', 'Iceland',
       'Channel Islands', 'Denmark', 'Cyprus', 'Sweden', 'Austria',
       'Israel', 'Finland', 'Bahrain', 'Greece', 'Hong Kong', 'Singapore',
       'Lebanon', 'United Arab Emirates', 'Saudi Arabia',
       'Czech Republic', 'Canada', 'Unspecified', 'Brazil', 'USA',
       'European Community', 'Malta', 'RSA'], dtype=object)

In [6]:
data['Description'] = data['Description'].str.strip()

In [7]:
data.dropna(axis=0, subset=['InvoiceNo'],inplace=True)

In [8]:
data['InvoiceNo']=data['InvoiceNo'].astype('str')

In [9]:
data=data[~ data['InvoiceNo'].str.contains('C')]

In [10]:
bucket_France = data[data['Country']=="France"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [11]:
bucket_Australia = data[data['Country']=="Australia"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [12]:
bucket_Portugal = data[data['Country']=="Portugal"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [13]:
bucket_Italy = data[data['Country']=="Italy"].groupby(['InvoiceNo', 'Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [14]:
def hot_encode(x):
    if(x<=0):
        return 0
    if(x>=1):
        return 1

In [15]:
b_encoded=bucket_France.applymap(hot_encode)
bucket_France = b_encoded

In [16]:
b_encoded=bucket_Australia.applymap(hot_encode)
bucket_Australia = b_encoded

In [17]:
b_encoded=bucket_Portugal.applymap(hot_encode)
bucket_Portugal = b_encoded

In [18]:
b_encoded=bucket_Italy.applymap(hot_encode)
bucket_Italy = b_encoded

In [19]:
frq_items = apriori(bucket_France, min_support=0.05, use_colnames = True)
frq_items.head()


Unnamed: 0,support,itemsets
0,0.071429,(4 TRADITIONAL SPINNING TOPS)
1,0.096939,(ALARM CLOCK BAKELIKE GREEN)
2,0.102041,(ALARM CLOCK BAKELIKE PINK)
3,0.094388,(ALARM CLOCK BAKELIKE RED)
4,0.068878,(ASSORTED COLOUR MINI CASES)


In [20]:
rules=association_rules(frq_items, metric="lift",min_threshold=1)


In [21]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(POSTAGE),(4 TRADITIONAL SPINNING TOPS),0.765306,0.071429,0.056122,0.073333,1.026667,0.001458,1.002055
1,(4 TRADITIONAL SPINNING TOPS),(POSTAGE),0.071429,0.765306,0.056122,0.785714,1.026667,0.001458,1.095238
2,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE PINK),0.096939,0.102041,0.073980,0.763158,7.478947,0.064088,3.791383
3,(ALARM CLOCK BAKELIKE PINK),(ALARM CLOCK BAKELIKE GREEN),0.102041,0.096939,0.073980,0.725000,7.478947,0.064088,3.283859
4,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED),0.096939,0.094388,0.079082,0.815789,8.642959,0.069932,4.916181
...,...,...,...,...,...,...,...,...,...
343,"(POSTAGE, SET/20 RED RETROSPOT PAPER NAPKINS)","(SET/6 RED SPOTTY PAPER CUPS, SET/6 RED SPOTTY...",0.109694,0.122449,0.081633,0.744186,6.077519,0.068201,3.430427
344,(SET/6 RED SPOTTY PAPER CUPS),"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE, SET/2...",0.137755,0.084184,0.081633,0.592593,7.039282,0.070036,2.247913
345,(SET/6 RED SPOTTY PAPER PLATES),"(SET/6 RED SPOTTY PAPER CUPS, POSTAGE, SET/20 ...",0.127551,0.084184,0.081633,0.640000,7.602424,0.070895,2.543934
346,(POSTAGE),"(SET/6 RED SPOTTY PAPER CUPS, SET/6 RED SPOTTY...",0.765306,0.099490,0.081633,0.106667,1.072137,0.005493,1.008034


In [22]:
rules=rules.sort_values(['confidence','lift'], ascending=[False, False])

In [23]:
print(rules)

                                           antecedents  \
45                        (JUMBO BAG WOODLAND ANIMALS)   
260  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
272  (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI...   
301  (SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...   
302  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   
..                                                 ...   
36                                           (POSTAGE)   
26                                           (POSTAGE)   
96                                           (POSTAGE)   
225                                          (POSTAGE)   
215                                          (POSTAGE)   

                                           consequents  antecedent support  \
45                                           (POSTAGE)            0.076531   
260                                          (POSTAGE)            0.051020   
272                                          (POSTAGE)            0.0

In [24]:
frq_items1 = apriori(bucket_Australia, min_support=0.04, use_colnames = True)

In [None]:
rules = association_rules(frq_items1, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])
print(rules.head())