### Question
Market Basket Analysis: Apriori Algorithm <br>
Dataset: Order1.csv <br>
The dataset has 38765 rows of the purchase orders of people from the
grocery stores. <br>
These orders can be analysed, and association rules can
be generated using Market Basket Analysis by algorithms like Apriori
Algorithm. <br>
Follow following Steps: <br>
1. Data Pre-processing
2. Generate the list of transactions from the dataset
3. Train Apriori on the dataset
4. Visualize the list of datasets

In [1]:
# Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Load the dataset
df = pd.read_csv('Order1.csv')

In [3]:
# Create transactions list
transactions = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list).reset_index(name='Transaction')

In [4]:
# Visualize transactions
transactions.head()

Unnamed: 0,Member_number,Date,Transaction
0,1000,15-03-2015,"[sausage, whole milk, semi-finished bread, yog..."
1,1000,24-06-2014,"[whole milk, pastry, salty snack]"
2,1000,24-07-2015,"[canned beer, misc. beverages]"
3,1000,25-11-2015,"[sausage, hygiene articles]"
4,1000,27-05-2015,"[soda, pickled vegetables]"


In [6]:
# Encoding and printing output
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
one_hot_encoded = te.fit_transform(transactions['Transaction'])
one_hot_encoded

array([[False, False, False, ...,  True,  True, False],
       [False, False, False, ...,  True, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [8]:
# New Datafram
ndf = pd.DataFrame(one_hot_encoded,columns=te.columns_)
ndf.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [11]:
# Printing Frequently used items together
from mlxtend.frequent_patterns import apriori , association_rules

frequent_itemsets = apriori(ndf,min_support=0.001,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.004010,(Instant food products)
1,0.021386,(UHT-milk)
2,0.001470,(abrasive cleaner)
3,0.001938,(artif. sweetener)
4,0.008087,(baking powder)
...,...,...
745,0.001136,"(rolls/buns, sausage, whole milk)"
746,0.001002,"(rolls/buns, whole milk, soda)"
747,0.001337,"(rolls/buns, whole milk, yogurt)"
748,0.001069,"(sausage, whole milk, soda)"


In [10]:
# Printing Association Rules
rules = association_rules(frequent_itemsets,metric='lift',min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(UHT-milk),(tropical fruit),0.021386,0.067767,0.001537,0.071875,1.060617,8.785064e-05,1.004426,0.058402
1,(tropical fruit),(UHT-milk),0.067767,0.021386,0.001537,0.022682,1.060617,8.785064e-05,1.001326,0.061307
2,(brown bread),(beef),0.037626,0.033950,0.001537,0.040853,1.203301,2.597018e-04,1.007196,0.175559
3,(beef),(brown bread),0.033950,0.037626,0.001537,0.045276,1.203301,2.597018e-04,1.008012,0.174891
4,(citrus fruit),(beef),0.053131,0.033950,0.001804,0.033962,1.000349,6.297697e-07,1.000012,0.000369
...,...,...,...,...,...,...,...,...,...,...
235,"(yogurt, whole milk)",(sausage),0.011161,0.060349,0.001470,0.131737,2.182917,7.967480e-04,1.082219,0.548014
236,"(sausage, whole milk)",(yogurt),0.008955,0.085879,0.001470,0.164179,1.911760,7.012151e-04,1.093681,0.481231
237,(yogurt),"(sausage, whole milk)",0.085879,0.008955,0.001470,0.017121,1.911760,7.012151e-04,1.008307,0.521727
238,(sausage),"(yogurt, whole milk)",0.060349,0.011161,0.001470,0.024363,2.182917,7.967480e-04,1.013532,0.576701
