In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [353]:
df = pd.read_csv('./adult.csv')

In [354]:
df.head()

Unnamed: 0,age,maritalStatus,race,sex,hoursPerWeek,nativeCountry,income
0,39,Single,White,Male,40,US,<=50K
1,50,Married,White,Male,13,US,<=50K
2,38,Married,White,Male,40,US,<=50K
3,53,Married,Black,Male,40,China,<=50K
4,28,Married,Black,Female,40,China,<=50K


In [355]:
df.drop(['age', 'race','hoursPerWeek'], axis=1, inplace=True)

In [356]:
df.sex = df.sex.map({'Male': 0, 'Female': 1})
df.maritalStatus = df.maritalStatus.map({'Single': 0, 'Married': 1})
df.income = df.income.map({'<=50K': 0, '>50K': 1})
df.head()

Unnamed: 0,maritalStatus,sex,nativeCountry,income
0,0,0,US,0
1,1,0,US,0
2,1,0,US,0
3,1,0,China,0
4,1,1,China,0


In [357]:
oh = OneHotEncoder()

In [358]:
encoded = oh.fit_transform(df[['nativeCountry']]).toarray()
parsed = pd.DataFrame(encoded, columns = oh.categories_)
df.drop('nativeCountry', axis=1, inplace=True)
onehot = df.join(parsed)
onehot.head()

Unnamed: 0,maritalStatus,sex,income,"(China,)","(India,)","(Sweden,)","(US,)"
0,0,0,0,0.0,0.0,0.0,1.0
1,1,0,0,0.0,0.0,0.0,1.0
2,1,0,0,0.0,0.0,0.0,1.0
3,1,0,0,1.0,0.0,0.0,0.0
4,1,1,0,1.0,0.0,0.0,0.0


In [359]:
from mlxtend.frequent_patterns import apriori

In [360]:
ap = apriori(onehot, min_support=0.06, max_len=3)
ap

Unnamed: 0,support,itemsets
0,0.6,(0)
1,0.4,(1)
2,0.266667,(2)
3,0.266667,(3)
4,0.066667,(4)
5,0.2,(5)
6,0.466667,(6)
7,0.2,"(0, 1)"
8,0.2,"(0, 2)"
9,0.2,"(0, 3)"


In [361]:
from mlxtend.frequent_patterns import association_rules

In [362]:
support_ar = association_rules(ap, metric="support", min_threshold=0.2)
support_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(0),(1),0.6,0.4,0.2,0.333333,0.833333,-0.04,0.9
1,(1),(0),0.4,0.6,0.2,0.5,0.833333,-0.04,0.8
2,(0),(2),0.6,0.266667,0.2,0.333333,1.25,0.04,1.1
3,(2),(0),0.266667,0.6,0.2,0.75,1.25,0.04,1.6
4,(0),(3),0.6,0.266667,0.2,0.333333,1.25,0.04,1.1
5,(3),(0),0.266667,0.6,0.2,0.75,1.25,0.04,1.6
6,(0),(6),0.6,0.466667,0.2,0.333333,0.714286,-0.08,0.8
7,(6),(0),0.466667,0.6,0.2,0.428571,0.714286,-0.08,0.7


In [363]:
confidence_ar = association_rules(ap, metric="confidence", min_threshold=1)
confidence_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(4),(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
1,(4),(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
2,"(2, 3)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
3,"(0, 4)",(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
4,"(2, 4)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
5,(4),"(0, 2)",0.066667,0.2,0.066667,1.0,5.0,0.053333,inf
6,"(2, 5)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf


In [364]:
lift_ar = association_rules(ap, metric="lift", min_threshold=1.8)
lift_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2),(4),0.266667,0.066667,0.066667,0.25,3.75,0.048889,1.244444
1,(4),(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
2,"(0, 2)",(4),0.2,0.066667,0.066667,0.333333,5.0,0.053333,1.4
3,"(0, 4)",(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
4,(2),"(0, 4)",0.266667,0.066667,0.066667,0.25,3.75,0.048889,1.244444
5,(4),"(0, 2)",0.066667,0.2,0.066667,1.0,5.0,0.053333,inf
6,"(0, 5)",(2),0.133333,0.266667,0.066667,0.5,1.875,0.031111,1.466667
7,(2),"(0, 5)",0.266667,0.133333,0.066667,0.25,1.875,0.031111,1.155556


In [365]:
leverage_ar = association_rules(ap, metric="leverage", min_threshold=0.05)
leverage_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1),(5),0.4,0.2,0.133333,0.333333,1.666667,0.053333,1.2
1,(5),(1),0.2,0.4,0.133333,0.666667,1.666667,0.053333,1.8
2,"(0, 2)",(4),0.2,0.066667,0.066667,0.333333,5.0,0.053333,1.4
3,(4),"(0, 2)",0.066667,0.2,0.066667,1.0,5.0,0.053333,inf


In [366]:
conviction_ar = association_rules(ap, metric="conviction", min_threshold=1.5)
conviction_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(2),(0),0.266667,0.6,0.2,0.75,1.25,0.04,1.6
1,(3),(0),0.266667,0.6,0.2,0.75,1.25,0.04,1.6
2,(4),(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
3,(5),(1),0.2,0.4,0.133333,0.666667,1.666667,0.053333,1.8
4,(4),(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
5,"(2, 3)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
6,"(0, 4)",(2),0.066667,0.266667,0.066667,1.0,3.75,0.048889,inf
7,"(2, 4)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf
8,(4),"(0, 2)",0.066667,0.2,0.066667,1.0,5.0,0.053333,inf
9,"(2, 5)",(0),0.066667,0.6,0.066667,1.0,1.666667,0.026667,inf


In [367]:
ap = apriori(onehot, min_support=0.001, max_len=2)
ap

Unnamed: 0,support,itemsets
0,0.6,(0)
1,0.4,(1)
2,0.266667,(2)
3,0.266667,(3)
4,0.066667,(4)
5,0.2,(5)
6,0.466667,(6)
7,0.2,"(0, 1)"
8,0.2,"(0, 2)"
9,0.2,"(0, 3)"
