In [1]:
# Association algorithms for supervised classification on any dataset

In [2]:
dataset = [['Apple', 'Beer', 'Rice', 'Chicken'],
           ['Apple', 'Beer', 'Rice'],
           ['Apple', 'Beer'],
           ['Apple', 'Pear'],
           ['Milk', 'Beer', 'Rice', 'Chicken'],
           ['Milk', 'Beer', 'Rice'],
           ['Milk', 'Beer'],
           ['Apple', 'Pear']]

In [3]:
dataset

[['Apple', 'Beer', 'Rice', 'Chicken'],
 ['Apple', 'Beer', 'Rice'],
 ['Apple', 'Beer'],
 ['Apple', 'Pear'],
 ['Milk', 'Beer', 'Rice', 'Chicken'],
 ['Milk', 'Beer', 'Rice'],
 ['Milk', 'Beer'],
 ['Apple', 'Pear']]

In [4]:
# Import the transaction encoder
from mlxtend.preprocessing import TransactionEncoder

In [5]:
# Create the object
trans = TransactionEncoder()

In [6]:
# Apply the operation
df_t = trans.fit_transform(dataset)

In [7]:
df_t

array([[ True,  True,  True, False, False,  True],
       [ True,  True, False, False, False,  True],
       [ True,  True, False, False, False, False],
       [ True, False, False, False,  True, False],
       [False,  True,  True,  True, False,  True],
       [False,  True, False,  True, False,  True],
       [False,  True, False,  True, False, False],
       [ True, False, False, False,  True, False]])

In [8]:
trans.columns_

['Apple', 'Beer', 'Chicken', 'Milk', 'Pear', 'Rice']

In [9]:
import pandas as pd

In [10]:
# Create a structured dataframe
df = pd.DataFrame(df_t, columns=trans.columns_)

In [11]:
df

Unnamed: 0,Apple,Beer,Chicken,Milk,Pear,Rice
0,True,True,True,False,False,True
1,True,True,False,False,False,True
2,True,True,False,False,False,False
3,True,False,False,False,True,False
4,False,True,True,True,False,True
5,False,True,False,True,False,True
6,False,True,False,True,False,False
7,True,False,False,False,True,False


In [15]:
# Support count
sum(df['Rice']) / len(df)

0.5

In [16]:
# Generate frequent itemsets
from mlxtend.frequent_patterns import apriori

In [17]:
freq_itemset = apriori(df, min_support=0.25, use_colnames=True)

In [18]:
freq_itemset

Unnamed: 0,support,itemsets
0,0.625,(Apple)
1,0.75,(Beer)
2,0.25,(Chicken)
3,0.375,(Milk)
4,0.25,(Pear)
5,0.5,(Rice)
6,0.375,"(Beer, Apple)"
7,0.25,"(Pear, Apple)"
8,0.25,"(Rice, Apple)"
9,0.25,"(Beer, Chicken)"


In [19]:
# Generate strong association rules
from mlxtend.frequent_patterns import association_rules

In [20]:
rules = association_rules(freq_itemset,
                         metric='confidence',
                         min_threshold=0.5)

In [21]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Beer),(Apple),0.75,0.625,0.375,0.5,0.8,-0.09375,0.75
1,(Apple),(Beer),0.625,0.75,0.375,0.6,0.8,-0.09375,0.625
2,(Pear),(Apple),0.25,0.625,0.25,1.0,1.6,0.09375,inf
3,(Rice),(Apple),0.5,0.625,0.25,0.5,0.8,-0.0625,0.75
4,(Chicken),(Beer),0.25,0.75,0.25,1.0,1.333333,0.0625,inf
5,(Beer),(Milk),0.75,0.375,0.375,0.5,1.333333,0.09375,1.25
6,(Milk),(Beer),0.375,0.75,0.375,1.0,1.333333,0.09375,inf
7,(Beer),(Rice),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
8,(Rice),(Beer),0.5,0.75,0.5,1.0,1.333333,0.125,inf
9,(Rice),(Chicken),0.5,0.25,0.25,0.5,2.0,0.125,1.5


In [22]:
rules = rules[['antecedents','consequents','support','confidence']]

In [23]:
rules

Unnamed: 0,antecedents,consequents,support,confidence
0,(Beer),(Apple),0.375,0.5
1,(Apple),(Beer),0.375,0.6
2,(Pear),(Apple),0.25,1.0
3,(Rice),(Apple),0.25,0.5
4,(Chicken),(Beer),0.25,1.0
5,(Beer),(Milk),0.375,0.5
6,(Milk),(Beer),0.375,1.0
7,(Beer),(Rice),0.5,0.666667
8,(Rice),(Beer),0.5,1.0
9,(Rice),(Chicken),0.25,0.5


In [24]:
rules['antecedent_len'] = rules['antecedents'].apply(lambda x: len(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rules['antecedent_len'] = rules['antecedents'].apply(lambda x: len(x))


In [25]:
rules

Unnamed: 0,antecedents,consequents,support,confidence,antecedent_len
0,(Beer),(Apple),0.375,0.5,1
1,(Apple),(Beer),0.375,0.6,1
2,(Pear),(Apple),0.25,1.0,1
3,(Rice),(Apple),0.25,0.5,1
4,(Chicken),(Beer),0.25,1.0,1
5,(Beer),(Milk),0.375,0.5,1
6,(Milk),(Beer),0.375,1.0,1
7,(Beer),(Rice),0.5,0.666667,1
8,(Rice),(Beer),0.5,1.0,1
9,(Rice),(Chicken),0.25,0.5,1


In [26]:
nrules = rules[(rules['antecedent_len'] == 1) & 
               (rules['support'] > 0.30)]

In [27]:
nrules

Unnamed: 0,antecedents,consequents,support,confidence,antecedent_len
0,(Beer),(Apple),0.375,0.5,1
1,(Apple),(Beer),0.375,0.6,1
5,(Beer),(Milk),0.375,0.5,1
6,(Milk),(Beer),0.375,1.0,1
7,(Beer),(Rice),0.5,0.666667,1
8,(Rice),(Beer),0.5,1.0,1


In [32]:
# Prediction / Suggestion / Recommendation
nrules[nrules['antecedents'] == {'Apple'}]['consequents'][1]

frozenset({'Beer'})

In [34]:
rules.sort_values(by='confidence', ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence,antecedent_len
18,"(Beer, Chicken)",(Rice),0.25,1.0,2
2,(Pear),(Apple),0.25,1.0,1
21,(Chicken),"(Beer, Rice)",0.25,1.0,1
4,(Chicken),(Beer),0.25,1.0,1
24,"(Rice, Milk)",(Beer),0.25,1.0,2
6,(Milk),(Beer),0.375,1.0,1
15,"(Rice, Apple)",(Beer),0.25,1.0,2
8,(Rice),(Beer),0.5,1.0,1
19,"(Rice, Chicken)",(Beer),0.25,1.0,2
10,(Chicken),(Rice),0.25,1.0,1


In [35]:
# Export the rules
rules.to_csv('rules.csv', index=False)