# Apriori

## Importing the libraries

In [28]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

## Data Preprocessing

In [29]:
df = pd.read_csv('C:/Users/redye/MASTERS YORK/Data Mining/Rule Association/BurgersAndCoals.csv', encoding='latin-1')

## Change the lists of items to columns with binary values

In [30]:
df['Milk'] = df.apply(lambda row: 'Milk' in row['Ê ItemsÊÊ'], axis = 1)
df['Burgers'] = df.apply(lambda row: 'Burgers' in row['Ê ItemsÊÊ'], axis = 1)
df['Buns'] = df.apply(lambda row: 'Buns' in row['Ê ItemsÊÊ'], axis = 1)
df['Ketchup'] = df.apply(lambda row: 'Ketchup' in row['Ê ItemsÊÊ'], axis = 1)
df['Coals'] = df.apply(lambda row: 'Coals' in row['Ê ItemsÊÊ'], axis = 1)
df['Beer'] = df.apply(lambda row: 'Beer' in row['Ê ItemsÊÊ'], axis = 1)

## Remove the Original Columns

In [31]:
df.drop(['Ê ItemsÊÊ'], axis = 1, inplace = True)
df.drop(['TIDÊÊ'], axis=1, inplace=True)
df.head()

Unnamed: 0,Milk,Burgers,Buns,Ketchup,Coals,Beer
0,True,True,True,True,False,False
1,False,True,True,True,True,False
2,True,False,True,False,False,True
3,False,True,True,False,True,True
4,True,False,False,True,False,False


## Training the Apriori model on the dataset

In [32]:
freq_itemsets = apriori(df, min_support=0.25, use_colnames=True)
freq_itemsets.head()

Unnamed: 0,support,itemsets
0,0.5,(Milk)
1,0.7,(Burgers)
2,0.8,(Buns)
3,0.7,(Ketchup)
4,0.5,(Coals)


## Derive rules from the itemsets

In [33]:
rules = association_rules(freq_itemsets, metric="confidence", min_threshold=0.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Buns),0.5,0.8,0.4,0.800000,1.000000,0.00,1.00
1,(Buns),(Milk),0.8,0.5,0.4,0.500000,1.000000,0.00,1.00
2,(Milk),(Ketchup),0.5,0.7,0.3,0.600000,0.857143,-0.05,0.75
3,(Burgers),(Buns),0.7,0.8,0.6,0.857143,1.071429,0.04,1.40
4,(Buns),(Burgers),0.8,0.7,0.6,0.750000,1.071429,0.04,1.20
...,...,...,...,...,...,...,...,...,...
98,"(Burgers, Coals)","(Ketchup, Beer)",0.5,0.3,0.3,0.600000,2.000000,0.15,1.75
99,"(Burgers, Beer)","(Ketchup, Coals)",0.4,0.4,0.3,0.750000,1.875000,0.14,2.40
100,"(Coals, Beer)","(Ketchup, Burgers)",0.4,0.6,0.3,0.750000,1.250000,0.06,1.60
101,(Coals),"(Ketchup, Burgers, Beer)",0.5,0.3,0.3,0.600000,2.000000,0.15,1.75


## Separate only rules that have Coals as antecedent

In [36]:
coals_rules = rules[rules["antecedents"].apply(lambda x: 'Coals' in str(x))]
print(coals_rules.sort_values(by=['confidence'], ascending=False))

                   antecedents               consequents  antecedent support  \
8                      (Coals)                 (Burgers)                 0.5   
39            (Ketchup, Coals)                 (Burgers)                 0.4   
82         (Coals, Buns, Beer)                 (Burgers)                 0.3   
93      (Ketchup, Coals, Beer)                 (Burgers)                 0.3   
50               (Coals, Beer)                 (Burgers)                 0.4   
70      (Ketchup, Coals, Buns)                 (Burgers)                 0.3   
30               (Coals, Buns)                 (Burgers)                 0.4   
32                     (Coals)           (Burgers, Buns)                 0.5   
28            (Burgers, Coals)                    (Buns)                 0.5   
13                     (Coals)                    (Buns)                 0.5   
20                     (Coals)                    (Beer)                 0.5   
18                     (Coals)          