# **Apriori Algorithum**
**Apriori is an algorithm for frequent item set mining and association rule learning over relational databases. It proceeds by identifying the frequent individual items in the databas.**
## Instacart Grocery Dataset
- Instacart is an online grocery delivery service
- They have made available 3M grocery orders for over 200K users
- They provide between 4 to 100 orders for each user and each order contains the sequence of products purchased
- We also have a brief description of the products

**Importing libraries**

In [1]:
import pandas as pd

from mlxtend.frequent_patterns import apriori,association_rules
from mlxtend.preprocessing import TransactionEncoder

import matplotlib.pyplot as plt
import numpy as np

import seaborn as sns

In [2]:
df=pd.read_excel("order_prior.xlsx")
df.head()

Unnamed: 0.1,Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name_x,aisle_id_x,department_id_x,product_name_y,aisle_id_y,department_id_y,product_name,aisle_id,department_id,aisle,department
0,0,2,33120,1,1,Organic Egg Whites,86,16,Organic Egg Whites,86,16,Organic Egg Whites,86,16,eggs,dairy eggs
1,1,2,28985,2,1,Michigan Organic Kale,83,4,Michigan Organic Kale,83,4,Michigan Organic Kale,83,4,fresh vegetables,produce
2,2,2,9327,3,0,Garlic Powder,104,13,Garlic Powder,104,13,Garlic Powder,104,13,spices seasonings,pantry
3,3,2,45918,4,1,Coconut Butter,19,13,Coconut Butter,19,13,Coconut Butter,19,13,oils vinegars,pantry
4,4,2,30035,5,0,Natural Sweetener,17,13,Natural Sweetener,17,13,Natural Sweetener,17,13,baking ingredients,pantry


In [3]:
df.shape

(1048575, 16)

# Pre-Processing

**Removing unneccesory cloumns**

In [4]:
df.drop(["product_id","reordered","product_name_x","aisle_id_x","department_id_x","product_name_y","aisle_id_y","department_id_y","product_name","aisle_id","department_id","Unnamed: 0","department"],inplace=True,axis=1)

In [5]:
df.head()

Unnamed: 0,order_id,add_to_cart_order,aisle
0,2,1,eggs
1,2,2,fresh vegetables
2,2,3,spices seasonings
3,2,4,oils vinegars
4,2,5,baking ingredients


**As the data is not in transaction formatiom so conveting it in suitable form**

**Creating PIVOT Table**

In [6]:
my_basket = df.pivot_table(index='order_id', columns='aisle', 
                          values="add_to_cart_order",aggfunc='sum').fillna(0)
my_basket

aisle,air fresheners candles,asian foods,baby accessories,baby bath body care,baby food formula,bakery desserts,baking ingredients,baking supplies decor,beauty,beers coolers,...,spreads,tea,tofu meat alternatives,tortillas flat bread,trail mix snack mix,trash bags liners,vitamins supplements,water seltzer sparkling water,white wines,yogurt
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,...,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,16.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
6,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110717,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110719,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Converting data into binary form for calculations**

In [7]:
def encode(x):
    if x<=0:
        return 0
    if x>=1:
        return 1

In [8]:
my_basket_sets = my_basket.applymap(encode)
my_basket_sets

aisle,air fresheners candles,asian foods,baby accessories,baby bath body care,baby food formula,bakery desserts,baking ingredients,baking supplies decor,beauty,beers coolers,...,spreads,tea,tofu meat alternatives,tortillas flat bread,trail mix snack mix,trash bags liners,vitamins supplements,water seltzer sparkling water,white wines,yogurt
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,1,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
6,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110716,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
110717,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
110718,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
110719,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Apriori Algorithm 

**Creating all possible rules and filtering it with min support 10%**

In [20]:
frequent_itemsets = apriori(my_basket_sets, min_support=0.05, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.075796,(baking ingredients)
1,0.164040,(bread)
2,0.068713,(breakfast bakery)
3,0.074480,(butter)
4,0.070395,(candy chocolate)
...,...,...
152,0.050403,"(packaged vegetables fruits, milk, yogurt)"
153,0.050739,"(packaged vegetables fruits, yogurt, packaged ..."
154,0.062158,"(packaged vegetables fruits, fresh fruits, fre..."
155,0.068117,"(packaged cheese, fresh fruits, fresh vegetabl..."


**Association Rules**

In [21]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(bread),(fresh fruits),0.16404,0.556881,0.113473,0.691744,1.242176,0.022123,1.437505
1,(bread),(fresh vegetables),0.16404,0.443311,0.091223,0.556102,1.254429,0.018502,1.254093
2,(canned jarred vegetables),(fresh fruits),0.07372,0.556881,0.05046,0.684485,1.229141,0.009407,1.404431
3,(canned jarred vegetables),(fresh vegetables),0.07372,0.443311,0.05666,0.768579,1.733723,0.023979,2.405522
4,(canned meals beans),(fresh vegetables),0.07001,0.443311,0.050326,0.718836,1.621515,0.01929,1.979941
5,(cereal),(fresh fruits),0.091319,0.556881,0.058899,0.644985,1.15821,0.008046,1.248169
6,(chips pretzels),(fresh fruits),0.166596,0.556881,0.103968,0.62407,1.120652,0.011193,1.178727
7,(crackers),(fresh fruits),0.114271,0.556881,0.075066,0.65691,1.179623,0.01143,1.291553
8,(cream),(fresh fruits),0.09155,0.556881,0.057112,0.623832,1.120225,0.006129,1.177982
9,(eggs),(fresh fruits),0.137012,0.556881,0.095769,0.698983,1.255175,0.01947,1.472073


**An leverage value of 0 indicates independence.**

**A high conviction value means that the consequent is highly depending on the antecedent and range [0 inf]**

In [22]:
rules[rules["leverage"]==1] #As we see there is no independence rules here

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction


**Filtering Associated Rules with high Confidence >70%**


In [12]:
pd.set_option('display.max_rows', 129)
rules[rules["confidence"]>=0.5].sort_values(by="confidence",ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
57,"(packaged vegetables fruits, yogurt)",(fresh fruits),0.126766,0.556881,0.104198,0.821973,1.47603,0.033605,2.489054
52,"(yogurt, fresh vegetables)",(fresh fruits),0.144538,0.556881,0.118,0.816398,1.46602,0.03751,2.413483
34,"(milk, fresh vegetables)",(fresh fruits),0.125324,0.556881,0.100238,0.799831,1.43627,0.030448,2.213728
45,"(packaged vegetables fruits, fresh vegetables)",(fresh fruits),0.234646,0.556881,0.186569,0.795109,1.42779,0.055899,2.162708
39,"(packaged cheese, fresh vegetables)",(fresh fruits),0.134888,0.556881,0.104929,0.777897,1.396881,0.029812,1.995102
10,(packaged vegetables fruits),(fresh fruits),0.368082,0.556881,0.270958,0.736134,1.321888,0.06598,1.679336
5,(fresh vegetables),(fresh fruits),0.443311,0.556881,0.317045,0.715175,1.284251,0.070173,1.555757
17,(yogurt),(fresh fruits),0.263615,0.556881,0.188366,0.714551,1.283131,0.041564,1.55236
1,(bread),(fresh fruits),0.16404,0.556881,0.113473,0.691744,1.242176,0.022123,1.437505
13,(soy lactosefree),(fresh fruits),0.170258,0.556881,0.117558,0.690471,1.23989,0.022745,1.431591


**SOME ACTIONABLE RULES**


In [19]:
rules.filter(items = [1,13,3,15,17,20,24,39,58], axis=0).sort_values(by='confidence',ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
39,"(packaged cheese, fresh vegetables)",(fresh fruits),0.134888,0.556881,0.104929,0.777897,1.396881,0.029812,1.995102
17,(yogurt),(fresh fruits),0.263615,0.556881,0.188366,0.714551,1.283131,0.041564,1.55236
1,(bread),(fresh fruits),0.16404,0.556881,0.113473,0.691744,1.242176,0.022123,1.437505
13,(soy lactosefree),(fresh fruits),0.170258,0.556881,0.117558,0.690471,1.23989,0.022745,1.431591
3,(chips pretzels),(fresh fruits),0.166596,0.556881,0.103968,0.62407,1.120652,0.011193,1.178727
20,(packaged cheese),(fresh vegetables),0.230061,0.443311,0.134888,0.586314,1.322577,0.032899,1.345678
15,(water seltzer sparkling water),(fresh fruits),0.191567,0.556881,0.110427,0.576439,1.03512,0.003747,1.046175
58,"(fresh fruits, yogurt)",(packaged vegetables fruits),0.188366,0.368082,0.104198,0.553169,1.50284,0.034864,1.41422
24,(yogurt),(fresh vegetables),0.263615,0.443311,0.144538,0.548292,1.23681,0.027674,1.232408
