# Gerando regras de associação com o A-Priori

Bibliotecas necessárias:

In [4]:
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [5]:
df_notas = pd.read_csv("./NOTAS_ALTAS.csv")

In [6]:
df_notas.head(5)

Unnamed: 0,NT_GER,QE_I02,QE_I03,QE_I04,QE_I05,QE_I06,QE_I07,QE_I08,QE_I09,QE_I10,...,QE_I17,QE_I18,QE_I19,QE_I20,QE_I21,QE_I22,QE_I23,QE_I24,QE_I25,QE_I26
0,ALTA,Q02D,Q03A,Q04E,Q05D,Q06C,Q07B,Q08B,Q09D,Q10E,...,Q17A,Q18B,Q19G,Q20K,Q21A,Q22B,Q23C,Q24E,Q25C,Q26A
1,ALTA,Q02F,Q03A,Q04B,Q05B,Q06B,Q07D,Q08C,Q09C,Q10B,...,Q17A,Q18A,Q19A,Q20G,Q21A,Q22A,Q23B,Q24E,Q25C,Q26C
2,ALTA,Q02D,Q03A,Q04D,Q05D,Q06B,Q07D,Q08C,Q09C,Q10C,...,Q17B,Q18A,Q19B,Q20H,Q21A,Q22C,Q23D,Q24A,Q25E,Q26A
3,ALTA,Q02B,Q03A,Q04D,Q05E,Q06E,Q07C,Q08B,Q09C,Q10A,...,Q17A,Q18B,Q19B,Q20G,Q21A,Q22A,Q23C,Q24A,Q25E,Q26A
4,ALTA,Q02A,Q03A,Q04D,Q05D,Q06D,Q07A,Q08B,Q09B,Q10B,...,Q17B,Q18A,Q19B,Q20H,Q21A,Q22E,Q23B,Q24A,Q25H,Q26A


#### Passo preliminar: transformar os dados em itemsets (formato de entrada do algoritmo).

In [7]:
te = TransactionEncoder()
te_ary = te.fit(df_notas.values).transform(df_notas.values)
df_ = pd.DataFrame(te_ary, columns=te.columns_)

In [8]:
df_.head(5)

Unnamed: 0,ALTA,Q02A,Q02B,Q02C,Q02D,Q02E,Q02F,Q03A,Q03B,Q03C,...,Q25H,Q26A,Q26B,Q26C,Q26D,Q26E,Q26F,Q26G,Q26H,Q26I
0,True,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
1,True,False,False,False,False,False,True,True,False,False,...,False,False,False,True,False,False,False,False,False,False
2,True,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
3,True,False,True,False,False,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
4,True,True,False,False,False,False,False,True,False,False,...,True,True,False,False,False,False,False,False,False,False


# O procedimento de geração de regras de associação é basicamente realizado em dois passos.

#### 1° passo: descobrir os itemsets frequentes.

In [9]:
frequent_itemsets = apriori(df_, min_support=0.2, use_colnames=True)

#### 2° passo: descobrir as regras de associação

In [11]:
rules = association_rules(frequent_itemsets, metric='confidence')

In [12]:
rules.head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Q02A),(ALTA),0.603409,1.0,0.603409,1.0,1.0,0.0,inf
1,(Q02D),(ALTA),0.256315,1.0,0.256315,1.0,1.0,0.0,inf
2,(ALTA),(Q03A),1.0,0.994502,0.994502,0.994502,1.0,0.0,1.0
3,(Q03A),(ALTA),0.994502,1.0,0.994502,1.0,1.0,0.0,inf
4,(Q04B),(ALTA),0.21325,1.0,0.21325,1.0,1.0,0.0,inf
5,(Q04D),(ALTA),0.316892,1.0,0.316892,1.0,1.0,0.0,inf


In [13]:
asc_rules = rules[['antecedents', 'consequents', 'confidence']]

In [16]:
asc_rules.tail(10)

Unnamed: 0,antecedents,consequents,confidence
13175,"(ALTA, Q03A, Q19B, Q17B)","(Q12A, Q18A, Q21A, Q15A)",0.801479
13176,"(Q18A, Q03A, Q19B, Q17B)","(Q12A, ALTA, Q21A, Q15A)",0.859066
13177,"(ALTA, Q19B, Q17B, Q15A)","(Q12A, Q18A, Q21A, Q03A)",0.818683
13178,"(Q18A, Q19B, Q17B, Q15A)","(Q12A, ALTA, Q21A, Q03A)",0.875369
13179,"(ALTA, Q18A, Q19B, Q17B)","(Q12A, Q21A, Q03A, Q15A)",0.855728
13180,"(Q19B, Q21A, Q17B)","(Q12A, Q03A, Q15A, ALTA, Q18A)",0.876099
13181,"(Q12A, Q19B, Q17B)","(Q21A, Q03A, Q15A, ALTA, Q18A)",0.843829
13182,"(Q03A, Q19B, Q17B)","(Q21A, Q12A, Q15A, ALTA, Q18A)",0.801479
13183,"(Q19B, Q17B, Q15A)","(Q21A, Q12A, Q03A, ALTA, Q18A)",0.818683
13184,"(Q18A, Q19B, Q17B)","(Q21A, Q12A, Q03A, Q15A, ALTA)",0.855728


#### Filtro para pegar as regras que envolvem rendimento

In [17]:
df_new = pd.DataFrame()

for i in range(len(asc_rules)):
    if "ALTA" in (asc_rules['consequents'][i]):
        df_new[i] = asc_rules.loc[i]

df_new = df_new.transpose()

#### Regras Geradas

In [18]:
df_new

Unnamed: 0,antecedents,consequents,confidence
0,(Q02A),(ALTA),1
1,(Q02D),(ALTA),1
3,(Q03A),(ALTA),1
4,(Q04B),(ALTA),1
5,(Q04D),(ALTA),1
6,(Q05D),(ALTA),1
7,(Q05E),(ALTA),1
8,(Q06B),(ALTA),1
9,(Q06C),(ALTA),1
10,(Q07C),(ALTA),1


In [19]:
df_new.to_csv("regras_notas_altas.csv", index=False)