# Chapter 6 - Other Popular Machine Learning Methods
## Segment 1 - Association Rule Mining Using Apriori Algorithm

# Import the required libraries

In [7]:
pip install mlxtend

Collecting mlxtend
  Using cached https://files.pythonhosted.org/packages/64/e2/1610a86284029abcad0ac9bc86cb19f9787fe6448ede467188b2a5121bb4/mlxtend-0.17.2-py2.py3-none-any.whl
Installing collected packages: mlxtend
Successfully installed mlxtend-0.17.2
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [8]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Data Format

In [9]:
address = 'Data/groceries.csv'
data = pd.read_csv(address)

In [26]:
data

Unnamed: 0,1,2,3,4,5,6,7,8,9
0,citrus fruit,semi-finished bread,margarine,ready soups,,,,,
1,tropical fruit,yogurt,coffee,,,,,,
2,whole milk,,,,,,,,
3,pip fruit,yogurt,cream cheese,meat spreads,,,,,
4,other vegetables,whole milk,condensed milk,long life bakery product,,,,,
...,...,...,...,...,...,...,...,...,...
3085,whipped/sour cream,domestic eggs,bottled water,soda,,,,,
3086,tropical fruit,pip fruit,other vegetables,butter milk,yogurt,whipped/sour cream,UHT-milk,margarine,sugar
3087,UHT-milk,canned beer,,,,,,,
3088,root vegetables,semi-finished bread,specialty bar,,,,,,


# Data Coversion

In [12]:
basket_sets = pd.get_dummies(data)
basket_sets.head()

Unnamed: 0,1_Instant food products,1_UHT-milk,1_artif. sweetener,1_baby cosmetics,1_bags,1_baking powder,1_bathroom cleaner,1_beef,1_berries,1_beverages,...,9_sweet spreads,9_tea,9_vinegar,9_waffles,9_whipped/sour cream,9_white bread,9_white wine,9_whole milk,9_yogurt,9_zwieback
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Support Calculation

In [31]:
apriori(basket_sets, min_support=0.02)[:5]

Unnamed: 0,support,itemsets
0,0.030421,(7)
1,0.034951,(17)
2,0.029126,(23)
3,0.049191,(26)
4,0.064401,(47)


In [32]:
apriori(basket_sets, min_support=0.02, use_colnames=True)[:5]

Unnamed: 0,support,itemsets
0,0.030421,(1_beef)
1,0.034951,(1_canned beer)
2,0.029126,(1_chicken)
3,0.049191,(1_citrus fruit)
4,0.064401,(1_frankfurter)


In [33]:
df = basket_sets

frequent_itemsets = apriori(basket_sets, min_support=0.002, use_colnames=True)
frequent_itemsets['itemsets count'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,itemsets count
0,0.006472,(1_UHT-milk),1
1,0.030421,(1_beef),1
2,0.011974,(1_berries),1
3,0.008414,(1_beverages),1
4,0.014887,(1_bottled beer),1
...,...,...,...
844,0.002265,"(3_pip fruit, 5_other vegetables, 6_whole milk)",3
845,0.002589,"(5_whole milk, 3_root vegetables, 4_other vege...",3
846,0.002913,"(3_whole milk, 5_yogurt, 4_curd)",3
847,0.003236,"(4_root vegetables, 5_other vegetables, 6_whol...",3


In [34]:
frequent_itemsets[frequent_itemsets['itemsets count'] >= 3].head()

Unnamed: 0,support,itemsets,itemsets count
820,0.002589,"(1_beef, 2_root vegetables, 3_other vegetables)",3
821,0.002589,"(3_whole milk, 2_other vegetables, 1_chicken)",3
822,0.002589,"(3_whole milk, 2_other vegetables, 1_citrus fr...",3
823,0.003236,"(3_pip fruit, 1_citrus fruit, 2_tropical fruit)",3
824,0.002589,"(1_citrus fruit, 4_whole milk, 3_other vegetab...",3


# Association Rules

## Confidence

In [38]:
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2_sausage),(1_frankfurter),0.011327,0.064401,0.011327,1.000000,15.527638,0.010597,inf
1,(7_pastry),(1_frankfurter),0.005178,0.064401,0.002589,0.500000,7.763819,0.002256,1.871197
2,(2_ham),(1_sausage),0.007120,0.076052,0.004531,0.636364,8.367505,0.003989,2.540858
3,(2_meat),(1_sausage),0.006796,0.076052,0.004854,0.714286,9.392097,0.004338,3.233819
4,(3_beef),(1_sausage),0.004854,0.076052,0.002589,0.533333,7.012766,0.002220,1.979889
...,...,...,...,...,...,...,...,...,...
71,"(4_root vegetables, 5_other vegetables)",(6_whole milk),0.005178,0.009385,0.003236,0.625000,66.594828,0.003188,2.641640
72,"(4_root vegetables, 6_whole milk)",(5_other vegetables),0.003883,0.012621,0.003236,0.833333,66.025641,0.003187,5.924272
73,"(5_other vegetables, 7_butter)",(6_whole milk),0.002589,0.009385,0.002265,0.875000,93.232759,0.002241,7.924919
74,"(6_whole milk, 7_butter)",(5_other vegetables),0.002913,0.012621,0.002265,0.777778,61.623932,0.002229,4.443204


## Lift

In [39]:
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1_beef),(2_citrus fruit),0.030421,0.028803,0.005502,0.180851,6.278986,0.004625,1.185618
1,(2_citrus fruit),(1_beef),0.028803,0.030421,0.005502,0.191011,6.278986,0.004625,1.198508
2,(1_beef),(2_other vegetables),0.030421,0.058900,0.003236,0.106383,1.806173,0.001444,1.053136
3,(2_other vegetables),(1_beef),0.058900,0.030421,0.003236,0.054945,1.806173,0.001444,1.025950
4,(1_beef),(2_root vegetables),0.030421,0.036893,0.005502,0.180851,4.902016,0.004379,1.175741
...,...,...,...,...,...,...,...,...,...
965,"(5_other vegetables, 7_butter)",(6_whole milk),0.002589,0.009385,0.002265,0.875000,93.232759,0.002241,7.924919
966,"(6_whole milk, 7_butter)",(5_other vegetables),0.002913,0.012621,0.002265,0.777778,61.623932,0.002229,4.443204
967,(5_other vegetables),"(6_whole milk, 7_butter)",0.012621,0.002913,0.002265,0.179487,61.623932,0.002229,1.215200
968,(6_whole milk),"(5_other vegetables, 7_butter)",0.009385,0.002589,0.002265,0.241379,93.232759,0.002241,1.314769


## Lift and Confidence

In [40]:
rules[(rules['lift'] >= 5) & (rules['confidence']>= 0.5)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
93,(2_sausage),(1_frankfurter),0.011327,0.064401,0.011327,1.000000,15.527638,0.010597,inf
137,(7_pastry),(1_frankfurter),0.005178,0.064401,0.002589,0.500000,7.763819,0.002256,1.871197
238,(2_ham),(1_sausage),0.007120,0.076052,0.004531,0.636364,8.367505,0.003989,2.540858
243,(2_meat),(1_sausage),0.006796,0.076052,0.004854,0.714286,9.392097,0.004338,3.233819
258,(3_beef),(1_sausage),0.004854,0.076052,0.002589,0.533333,7.012766,0.002220,1.979889
...,...,...,...,...,...,...,...,...,...
958,"(4_root vegetables, 5_other vegetables)",(6_whole milk),0.005178,0.009385,0.003236,0.625000,66.594828,0.003188,2.641640
959,"(4_root vegetables, 6_whole milk)",(5_other vegetables),0.003883,0.012621,0.003236,0.833333,66.025641,0.003187,5.924272
965,"(5_other vegetables, 7_butter)",(6_whole milk),0.002589,0.009385,0.002265,0.875000,93.232759,0.002241,7.924919
966,"(6_whole milk, 7_butter)",(5_other vegetables),0.002913,0.012621,0.002265,0.777778,61.623932,0.002229,4.443204
