In [35]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
data = pd.read_csv('small_retail.csv')
data.head()

Unnamed: 0,InvoiceNo,StockCode,lower,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,white hanging heart t-light holder,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,white metal lantern,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,cream cupid hearts coat hanger,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,knitted union flag hot water bottle,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,red woolly hottie white heart.,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [3]:
#查看欄位
data.columns

Index(['InvoiceNo', 'StockCode', 'lower', 'Description', 'Quantity',
       'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country'],
      dtype='object')

In [4]:
#查看Country
data.Country.unique()

array(['United Kingdom', 'France', 'Australia', 'Netherlands', 'Germany',
       'Norway', 'EIRE', 'Switzerland', 'Spain', 'Poland', 'Portugal',
       'Italy', 'Belgium', 'Lithuania', 'Japan'], dtype=object)

In [5]:
#清理資料
data['Description']=data['Description'].str.strip()

In [8]:
#清除遺缺值
data.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
data['InvoiceNo']=data['InvoiceNo'].astype('str')

In [13]:
#InvoiceNo有C字母的都刪除
data=data[~data['InvoiceNo'].str.contains('C')]

In [33]:
#在France消費的資料
basket_France=data[data['Country']=='France'].groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')
#在United Kingdom消費的資料
basket_UK=data[data['Country']=='United Kingdom'].groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')
#在Portugal消費的資料
basket_Por=data[data['Country']=='Portugal'].groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')
#在Sweden消費的資料
basket_Sweden=data[data['Country']=='Sweden'].groupby(['InvoiceNo','Description'])['Quantity'].sum().unstack().reset_index().fillna(0).set_index('InvoiceNo')

In [34]:
#編碼，讓數據值只有0或1
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1
    
basket_France =basket_France.applymap(hot_encode)
basket_UK = basket_UK.applymap(hot_encode)
basket_Por = basket_Por.applymap(hot_encode)
basket_Sweden = basket_Sweden.applymap(hot_encode)

In [37]:
#建立模型
frq_items = apriori(basket_France, min_support = 0.5, use_colnames = True)

In [39]:
#找出關聯規則
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE PINK),0.5,0.5,0.5,1.0,2.0,0.25,inf
1,(ALARM CLOCK BAKELIKE PINK),(ALARM CLOCK BAKELIKE GREEN),0.5,0.5,0.5,1.0,2.0,0.25,inf
2,(ALARM CLOCK BAKELIKE RED),(ALARM CLOCK BAKELIKE GREEN),0.5,0.5,0.5,1.0,2.0,0.25,inf
3,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED),0.5,0.5,0.5,1.0,2.0,0.25,inf
6,(ALARM CLOCK BAKELIKE GREEN),(ROUND SNACK BOXES SET OF4 WOODLAND),0.5,0.5,0.5,1.0,2.0,0.25,inf
...,...,...,...,...,...,...,...,...,...
1852,(POSTAGE),"(LUNCH BAG RED RETROSPOT, ASSORTED COLOUR MINI...",1.0,0.5,0.5,0.5,1.0,0.00,1.0
1914,(POSTAGE),"(LUNCH BOX WITH CUTLERY RETROSPOT, LUNCH BAG R...",1.0,0.5,0.5,0.5,1.0,0.00,1.0
1976,(POSTAGE),"(LUNCH BOX WITH CUTLERY RETROSPOT, ASSORTED CO...",1.0,0.5,0.5,0.5,1.0,0.00,1.0
2038,(POSTAGE),"(LUNCH BOX WITH CUTLERY RETROSPOT, LUNCH BAG R...",1.0,0.5,0.5,0.5,1.0,0.00,1.0


In [45]:
frq_items = apriori(basket_UK, min_support = 0.05, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
38,"(RED WOOLLY HOTTIE WHITE HEART., RETRO COFFEE ...",(KNITTED UNION FLAG HOT WATER BOTTLE),0.050633,0.103797,0.050633,1.0,9.634146,0.045377,inf
43,"(WHITE HANGING HEART T-LIGHT HOLDER, RED WOOLL...",(KNITTED UNION FLAG HOT WATER BOTTLE),0.060759,0.103797,0.058228,0.958333,9.232724,0.051921,21.508861
37,"(KNITTED UNION FLAG HOT WATER BOTTLE, RETRO CO...",(RED WOOLLY HOTTIE WHITE HEART.),0.053165,0.106329,0.050633,0.952381,8.956916,0.04498,18.767089
30,"(HAND WARMER SCOTTY DOG DESIGN, HAND WARMER RE...",(HAND WARMER OWL DESIGN),0.058228,0.124051,0.053165,0.913043,7.360248,0.045941,10.073418
42,"(WHITE HANGING HEART T-LIGHT HOLDER, KNITTED U...",(RED WOOLLY HOTTIE WHITE HEART.),0.065823,0.106329,0.058228,0.884615,8.319597,0.051229,7.745148


In [52]:
frq_items = apriori(basket_Por, min_support = 0.0001, use_colnames = True)
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
rules.head(20)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(LUNCH BAG SUKI DESIGN),(LUNCH BAG CARS BLUE),1.0,1.0,1.0,1.0,1.0,0.0,inf
1,(LUNCH BAG CARS BLUE),(LUNCH BAG SUKI DESIGN),1.0,1.0,1.0,1.0,1.0,0.0,inf
2,(LUNCH BAG WOODLAND),(LUNCH BAG CARS BLUE),1.0,1.0,1.0,1.0,1.0,0.0,inf
3,(LUNCH BAG CARS BLUE),(LUNCH BAG WOODLAND),1.0,1.0,1.0,1.0,1.0,0.0,inf
4,(POSTAGE),(LUNCH BAG CARS BLUE),1.0,1.0,1.0,1.0,1.0,0.0,inf
5,(LUNCH BAG CARS BLUE),(POSTAGE),1.0,1.0,1.0,1.0,1.0,0.0,inf
6,(RED HEART LUGGAGE TAG),(LUNCH BAG CARS BLUE),1.0,1.0,1.0,1.0,1.0,0.0,inf
7,(LUNCH BAG CARS BLUE),(RED HEART LUGGAGE TAG),1.0,1.0,1.0,1.0,1.0,0.0,inf
8,(RED RETROSPOT LUGGAGE TAG),(LUNCH BAG CARS BLUE),1.0,1.0,1.0,1.0,1.0,0.0,inf
9,(LUNCH BAG CARS BLUE),(RED RETROSPOT LUGGAGE TAG),1.0,1.0,1.0,1.0,1.0,0.0,inf


In [53]:
basket_Por

Description,LUNCH BAG CARS BLUE,LUNCH BAG SUKI DESIGN,LUNCH BAG WOODLAND,POSTAGE,RED HEART LUGGAGE TAG,RED RETROSPOT LUGGAGE TAG,VINTAGE PAISLEY STATIONERY SET
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
536990,1,1,1,1,1,1,1
