## First, we extract the data we are interested in

In [1]:
import numpy as np
import pandas as pd
import psycopg2 as pg
from sqlalchemy import create_engine

alchemyEngine = create_engine("postgresql://postgres:postgres@localhost:5432/baseball_db", pool_recycle=3600)
connection = alchemyEngine.connect()
query = """
    SELECT hits > 70 AS hits, homerun > 10 AS homeruns, runsbattedin > 30 AS rbi
    FROM databaseball 
    WHERE position = '1B'
"""

dataframe = pd.read_sql(query, connection)
%store dataframe
dataframe.head()

Stored 'dataframe' (DataFrame)


Unnamed: 0,hits,homeruns,rbi
0,True,True,True
1,True,True,True
2,True,True,True
3,True,True,True
4,True,False,True


## Next, we will generate the itemsets

In [4]:
%store -r
from mlxtend.frequent_patterns import fpgrowth

itemsets = fpgrowth(dataframe, min_support=0.06, use_colnames=True)
%store itemsets
itemsets

Unnamed: 0,support,itemsets
0,0.658228,(rbi)
1,0.594937,(homeruns)
2,0.56962,(hits)
3,0.582278,"(rbi, homeruns)"
4,0.56962,"(rbi, hits)"
5,0.531646,"(homeruns, hits)"
6,0.531646,"(rbi, homeruns, hits)"


## Now, lets see the association rules for these itemsets 

In [11]:
%store -r
from mlxtend.frequent_patterns import association_rules

rules = association_rules(itemsets, metric="lift", min_threshold=1)
rules = rules.sort_values(["confidence", "lift"], ascending=[False, False])
rules[["antecedents", "consequents", "support", "confidence", "lift"]]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
3,(hits),(rbi),0.56962,0.658228,0.56962,1.0,1.519231,0.19468,inf
8,"(homeruns, hits)",(rbi),0.531646,0.658228,0.531646,1.0,1.519231,0.181702,inf
1,(homeruns),(rbi),0.594937,0.658228,0.582278,0.978723,1.486907,0.190675,16.063291
11,(hits),"(rbi, homeruns)",0.56962,0.582278,0.531646,0.933333,1.602899,0.199968,6.265823
5,(hits),(homeruns),0.56962,0.594937,0.531646,0.933333,1.568794,0.192758,6.075949
7,"(rbi, hits)",(homeruns),0.56962,0.594937,0.531646,0.933333,1.568794,0.192758,6.075949
6,"(rbi, homeruns)",(hits),0.582278,0.56962,0.531646,0.913043,1.602899,0.199968,4.949367
4,(homeruns),(hits),0.594937,0.56962,0.531646,0.893617,1.568794,0.192758,4.04557
10,(homeruns),"(rbi, hits)",0.594937,0.56962,0.531646,0.893617,1.568794,0.192758,4.04557
0,(rbi),(homeruns),0.658228,0.594937,0.582278,0.884615,1.486907,0.190675,3.510549
