In [9]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder as ohe
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth, fpmax

In [2]:
data = pd.read_excel("database_sintomas.xlsx")
data.columns

Index(['FEBRE', 'TOSSE', 'FADIGA', 'PALADAR E OLFATO', 'DISPINÉIA',
       'OUTROS SINTOMAS', 'DIARREIA'],
      dtype='object')

In [3]:
encoder = ohe()
febre = pd.DataFrame(encoder.fit_transform(data["FEBRE"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
tosse = pd.DataFrame(encoder.fit_transform(data["TOSSE"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
fadiga = pd.DataFrame(encoder.fit_transform(data["FADIGA"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
paladar_e_olfato = pd.DataFrame(encoder.fit_transform(data["PALADAR E OLFATO"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
dispineia = pd.DataFrame(encoder.fit_transform(data["DISPINÉIA"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
outros_sintomas = pd.DataFrame(encoder.fit_transform(data["OUTROS SINTOMAS"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])
diarreia = pd.DataFrame(encoder.fit_transform(data["DIARREIA"].values.reshape(-1, 1)).toarray(), columns=encoder.categories_[0])

#print(fadiga.toarray()[:5])

df_encoded = pd.concat([febre, tosse, fadiga, paladar_e_olfato, dispineia, outros_sintomas, diarreia], axis=1)

In [10]:
frq_items = apriori(df_encoded, min_support = 0.05, use_colnames = True)

rules = association_rules(frq_items, metric="lift", min_threshold = 1)
rules = rules.sort_values(['lift', 'support'], ascending=[False, False])

print(rules.head())

                   antecedents                consequents  antecedent support  \
287  (SEM DIARREIA, FEBRÍCOLA)               (SEM FADIGA)            0.124971   
290               (SEM FADIGA)  (SEM DIARREIA, FEBRÍCOLA)            0.501445   
84          (PERDA DE PALADAR)                (SEM TOSSE)            0.249242   
85                 (SEM TOSSE)         (PERDA DE PALADAR)            0.246872   
108          (DISPINÉIA GRAVE)        (TOSSE PERSISTENTE)            0.248012   

     consequent support   support  confidence      lift  leverage  conviction  
287            0.501445  0.064831    0.518765  1.034539  0.002164    1.035990  
290            0.124971  0.064831    0.129288  1.034539  0.002164    1.004957  
84             0.246872  0.063641    0.255336  1.034284  0.002110    1.011366  
85             0.249242  0.063641    0.257787  1.034284  0.002110    1.011513  
108            0.250683  0.064281    0.259183  1.033910  0.002108    1.011475  


In [11]:
frq_max = fpmax(df_encoded, min_support=0.05, use_colnames=True, max_len=None, verbose=0)

frq_max.sort_values('support', ascending=False)

Unnamed: 0,support,itemsets
220,0.065341,"(TOSSE PRODUTIVA, SEM PERDA DE PALADAR OU OLFATO)"
188,0.065201,"(AFEBRIL, TOSSE PRODUTIVA)"
209,0.064831,"(SEM DIARREIA, SEM FADIGA, FEBRÍCOLA)"
231,0.064801,"(DISPINÉIA LEVE, SEM PERDA DE PALADAR OU OLFATO)"
226,0.064721,"(DISPINÉIA LEVE, DOR DE CABEÇA)"
...,...,...
21,0.060501,"(DOR DE GARGANTA, PERDA DE OLFATO)"
20,0.060401,"(FEBRÍCOLA, DOR DE GARGANTA)"
1,0.060241,"(SEM TOSSE, PERDA DE OLFATO)"
207,0.060141,"(SEM DIARREIA, FEBRÍCOLA, FADIGA)"


In [12]:
frq_growth = fpgrowth(df_encoded, min_support=0.05, use_colnames=True, max_len=None, verbose=0)

frq_growth.sort_values('support', ascending=False)

Unnamed: 0,support,itemsets
0,0.501445,(SEM FADIGA)
1,0.500425,(DIARREIA)
7,0.499575,(SEM DIARREIA)
8,0.498555,(FADIGA)
2,0.252563,(DISPINÉIA LEVE)
...,...,...
252,0.060501,"(DOR DE GARGANTA, PERDA DE OLFATO)"
240,0.060401,"(FEBRÍCOLA, DOR DE GARGANTA)"
315,0.060241,"(SEM TOSSE, PERDA DE OLFATO)"
150,0.060141,"(SEM DIARREIA, FEBRÍCOLA, FADIGA)"
