In [18]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [19]:
df = pd.read_csv("https://bit.ly/30A2gHO")

In [20]:
df.head()

Unnamed: 0,A,Quantity,Transaction,Store,Product
0,30000,2,93194,6,Magazine
1,30001,2,93194,6,Candy Bar
2,30002,2,93194,6,Candy Bar
3,30003,2,93194,6,Candy Bar
4,30004,2,93194,6,Candy Bar


In [21]:
# Concatenate Transaction & Store to create unique identifier
df['Tid'] = df['Transaction'].astype(str) +"#"+ df["Store"].astype(str)

In [22]:
df.head()

Unnamed: 0,A,Quantity,Transaction,Store,Product,Tid
0,30000,2,93194,6,Magazine,93194#6
1,30001,2,93194,6,Candy Bar,93194#6
2,30002,2,93194,6,Candy Bar,93194#6
3,30003,2,93194,6,Candy Bar,93194#6
4,30004,2,93194,6,Candy Bar,93194#6


In [23]:
# Drop transaction and store
df=df.drop(columns=['Transaction','Store','A','Quantity'],axis=1)

In [24]:
df.head()

Unnamed: 0,Product,Tid
0,Magazine,93194#6
1,Candy Bar,93194#6
2,Candy Bar,93194#6
3,Candy Bar,93194#6
4,Candy Bar,93194#6


In [25]:
df = df.groupby(["Tid","Product"]).size().reset_index(name="Count")
df.head()

Unnamed: 0,Tid,Product,Count
0,100001#3,Candy Bar,1
1,100007#10,Toothpaste,7
2,100010#6,Magazine,1
3,100013#10,Wrapping Paper,1
4,100016#6,Candy Bar,4


In [26]:
# Consolidating transactions
df2 = (df.groupby(['Tid', 'Product'])['Count'].sum().unstack().reset_index().fillna(0).set_index('Tid'))

df2.head()

Product,Bow,Candy Bar,Deodorant,Greeting Cards,Magazine,Markers,Pain Reliever,Pencils,Pens,Perfume,Photo Processing,Prescription Med,Shampoo,Soap,Toothbrush,Toothpaste,Wrapping Paper
Tid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100001#3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100007#10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0
100010#6,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100013#10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
100016#6,0.0,4.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0


In [27]:
df2.shape

(6726, 17)

In [28]:
# Data encoding
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

df2 = df2.applymap(encode_units)

df2.head()

Product,Bow,Candy Bar,Deodorant,Greeting Cards,Magazine,Markers,Pain Reliever,Pencils,Pens,Perfume,Photo Processing,Prescription Med,Shampoo,Soap,Toothbrush,Toothpaste,Wrapping Paper
Tid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
100001#3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
100007#10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
100010#6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
100013#10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
100016#6,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,1,0


In [29]:
df2.shape

(6726, 17)

In [30]:
# Generating frequent product sets
frq_pds = apriori(df2, min_support=0.01, use_colnames=True)
frq_pds

Unnamed: 0,support,itemsets
0,0.051591,(Bow)
1,0.175736,(Candy Bar)
2,0.15284,(Greeting Cards)
3,0.231936,(Magazine)
4,0.020071,(Pain Reliever)
5,0.135147,(Pencils)
6,0.144068,(Pens)
7,0.082664,(Perfume)
8,0.055456,(Photo Processing)
9,0.014422,(Prescription Med)


In [33]:
# Association rules
prod_rules = association_rules(frq_pds, metric="lift", min_threshold=1)

# Sorting 
prod_rules.sort_values("confidence", ascending = False, inplace = True)



In [34]:
# Previewing most associated products
prod_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
44,"(Toothpaste, Pencils)",(Candy Bar),0.022748,0.175736,0.011002,0.48366,2.752198,0.007005,1.596359
21,"(Greeting Cards, Magazine)",(Candy Bar),0.037467,0.175736,0.017247,0.460317,2.61937,0.010662,1.527313
39,"(Toothpaste, Magazine)",(Candy Bar),0.029884,0.175736,0.013232,0.442786,2.51961,0.007981,1.47926
26,"(Greeting Cards, Toothpaste)",(Candy Bar),0.033304,0.175736,0.01457,0.4375,2.48953,0.008718,1.465358
22,"(Candy Bar, Magazine)",(Greeting Cards),0.039994,0.15284,0.017247,0.431227,2.821431,0.011134,1.489452
