In [3]:
pip install mlxtend

Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [5]:
df = pd.read_csv("b2c.csv")

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085,United Kingdom
1,1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom
2,2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom
3,3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085,United Kingdom
4,4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085,United Kingdom


In [7]:
uk_df = df[df["Country"] == "United Kingdom"]

In [8]:
uk_df.head()

Unnamed: 0.1,Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085,United Kingdom
1,1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom
2,2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085,United Kingdom
3,3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085,United Kingdom
4,4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085,United Kingdom


In [9]:
basket = uk_df.groupby(["Invoice", "Description"])["Quantity"].sum().unstack()

In [10]:
basket = basket.fillna(0)

In [11]:
def encode_units(x):

    if x>0:
        return 1
    if x<=0:
        return 0

In [12]:
basket_sets = basket.applymap(encode_units)

In [13]:
basket_sets.head()

Description,DOORMAT UNION JACK GUNS AND ROSES,3 STRIPEY MICE FELTCRAFT,4 PURPLE FLOCK DINNER CANDLES,50'S CHRISTMAS GIFT BAG LARGE,ANIMAL STICKERS,BLACK PIRATE TREASURE CHEST,BROWN PIRATE TREASURE CHEST,Bank Charges,CAMPHOR WOOD PORTOBELLO MUSHROOM,CHERRY BLOSSOM DECORATIVE FLASK,...,ZINC STAR T-LIGHT HOLDER,ZINC SWEETHEART SOAP DISH,ZINC SWEETHEART WIRE LETTER RACK,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK,ZINC WIRE KITCHEN ORGANISER,ZINC WIRE SWEETHEART LETTER TRAY
Invoice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
489434,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
489435,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
489436,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
489437,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
489438,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames = True)



In [15]:
print(f"Most Frequent itemsets:(support>=0)")

Most Frequent itemsets:(support>=0)


In [16]:
frequent_itemsets.sort_values(by="support", ascending=False).head()

Unnamed: 0,support,itemsets
156,0.140144,(WHITE HANGING HEART T-LIGHT HOLDER)
127,0.085142,(REGENCY CAKESTAND 3 TIER)
9,0.074648,(ASSORTED COLOUR BIRD ORNAMENT)
66,0.07128,(JUMBO BAG RED RETROSPOT)
107,0.057894,(PARTY BUNTING)


In [17]:
rules = association_rules(frequent_itemsets, metric = "lift", min_threshold = 1.0)

In [18]:
rules.sort_values(by = ["lift","support"], ascending = False, inplace = True)

In [19]:
print(f"Top n/ most frequent itemsets")

Top n/ most frequent itemsets


In [20]:
rules.head(50)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
15,(SWEETHEART CERAMIC TRINKET BOX),(STRAWBERRY CERAMIC TRINKET BOX),0.032703,0.049398,0.022538,0.689152,13.95107,1.0,0.020922,3.058096,0.959706,0.378378,0.672999,0.572699
14,(STRAWBERRY CERAMIC TRINKET BOX),(SWEETHEART CERAMIC TRINKET BOX),0.049398,0.032703,0.022538,0.456246,13.95107,1.0,0.020922,1.778924,0.976561,0.378378,0.437862,0.572699
16,(WOODEN FRAME ANTIQUE WHITE ),(WOODEN PICTURE FRAME WHITE FINISH),0.051246,0.048026,0.028977,0.565445,11.773611,1.0,0.026516,2.190686,0.964491,0.412214,0.543522,0.584398
17,(WOODEN PICTURE FRAME WHITE FINISH),(WOODEN FRAME ANTIQUE WHITE ),0.048026,0.051246,0.028977,0.603352,11.773611,1.0,0.026516,2.391929,0.961229,0.412214,0.581927,0.584398
2,(LOVE BUILDING BLOCK WORD),(HOME BUILDING BLOCK WORD),0.043108,0.052707,0.022776,0.528354,10.024383,1.0,0.020504,2.008484,0.940799,0.311837,0.502112,0.48024
3,(HOME BUILDING BLOCK WORD),(LOVE BUILDING BLOCK WORD),0.052707,0.043108,0.022776,0.432127,10.024383,1.0,0.020504,1.685046,0.950332,0.311837,0.406544,0.48024
0,(HEART OF WICKER SMALL),(HEART OF WICKER LARGE),0.051574,0.051306,0.025489,0.49422,9.632832,1.0,0.022843,1.875704,0.944922,0.329353,0.466867,0.495512
1,(HEART OF WICKER LARGE),(HEART OF WICKER SMALL),0.051306,0.051574,0.025489,0.496804,9.632832,1.0,0.022843,1.884805,0.944655,0.329353,0.469441,0.495512
10,(LUNCH BAG SPACEBOY DESIGN ),(LUNCH BAG BLACK SKULL.),0.050054,0.056702,0.021792,0.435378,7.678405,1.0,0.018954,1.670673,0.915593,0.256491,0.401439,0.409855
11,(LUNCH BAG BLACK SKULL.),(LUNCH BAG SPACEBOY DESIGN ),0.056702,0.050054,0.021792,0.384332,7.678405,1.0,0.018954,1.542953,0.922046,0.256491,0.351892,0.409855
