In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth, association_rules

In [2]:
df = pd.read_csv('TransactionalData.csv')
df.drop(columns=['state','month','week'], inplace=True)
df['MaskUsage'] = 'MU: '+df['MaskUsage']
df['SocialDistancing'] = 'SD: '+df['SocialDistancing']
df['mobilityLevels'] = 'ML: '+df['mobilityLevels']
df['PopulationDensity'] = 'PD: '+df['PopulationDensity'].astype(str)
df['casesPC_Levels'] = 'CPC: '+df['casesPC_Levels']
df['CurrentIncidencePhase'] = 'CP: '+df['CurrentIncidencePhase']
df['FutureIncidencePhase'] = 'FP: '+df['FutureIncidencePhase']

In [3]:
tuples = df.values.tolist()
te = TransactionEncoder()
te_ary = te.fit(tuples).transform(tuples)
tdf = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = fpgrowth(tdf, min_support=0.00001, use_colnames=True)
assoc_rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.005)
assoc_rules['antecedents'] = assoc_rules['antecedents'].apply(lambda x:', '.join(list(x))).astype('unicode')
assoc_rules['consequents'] = assoc_rules['consequents'].apply(lambda x:', '.join(list(x))).astype('unicode')
assoc_rules['chi_squared'] = 1050*(assoc_rules['lift']-1)*(assoc_rules['lift']-1)*(assoc_rules['support']*assoc_rules['confidence']/((assoc_rules['confidence']-assoc_rules['support'])*(assoc_rules['lift']-assoc_rules['confidence'])))

In [4]:
targeted_consequents = ['FP: Phase 2', 'FP: Phase 3', 'FP: Phase 4', 'FP: Phase 5','FP: Phase 1']
assoc_rules_filtered = assoc_rules[assoc_rules['consequents'].isin(targeted_consequents)]

In [5]:
ndf={}
k = 0
for index,row in assoc_rules_filtered.iterrows():
    nrow = {}
    for ant_item in str(row['antecedents']).split(','):
        strngs = ant_item.split(':')
        nrow[strngs[0].strip()] = strngs[1].strip()
    nrow[row['consequents'].split(':')[0].strip()] = row['consequents'].split(':')[1].strip()
    nrow['support'] = row['support']
    nrow['confidence'] = row['confidence']
    nrow['lift'] = row['lift']
    nrow['chi_squared'] = row['chi_squared']
    ndf[k] = nrow
    k += 1

In [6]:
ndfDF = pd.DataFrame.from_dict(ndf, 'index')
ndfDF.rename(columns={'PD':'Population Density',
                      'FP':'Future Phase',
                      'CP':'Current Phase','MU':'Mask Mandate','SD':'Social Distancing','CPC':'Cases Per Capita','ML':'Mobility Level'}, inplace=True)
ndfDF = ndfDF[['Mask Mandate','Social Distancing','Mobility Level','Population Density','Cases Per Capita','Current Phase','Future Phase','support','confidence','lift','chi_squared']]
ndfDF.to_csv('Final_Rules.csv', index=False)