# Gerando regras de associação com o A-Priori

Bibliotecas necessárias:

In [1]:
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
df = pd.read_csv(r'C:/Users/JdMacGyver/Downloads/ES.csv')

In [3]:
df.head(5)

Unnamed: 0,NT_GER,QE_I01,QE_I02,QE_I03,QE_I04,QE_I05,QE_I06,QE_I07,QE_I08,QE_I09,...,QE_I17,QE_I18,QE_I19,QE_I20,QE_I21,QE_I22,QE_I23,QE_I24,QE_I25,QE_I26
0,50_75,Q01A,Q02D,Q03A,Q04D,Q05F,Q06A,Q07A,Q08A,Q09B,...,Q17B,Q18A,Q19A,Q20K,Q21A,Q22B,Q23E,Q24E,Q25H,Q26A
1,50_75,Q01A,Q02D,Q03A,Q04D,Q05F,Q06B,Q07A,Q08B,Q09E,...,Q17B,Q18A,Q19D,Q20G,Q21A,Q22B,Q23D,Q24A,Q25E,Q26F
2,0_25,Q01A,Q02A,Q03A,Q04E,Q05D,Q06A,Q07A,Q08C,Q09C,...,Q17A,Q18A,Q19B,Q20C,Q21A,Q22C,Q23D,Q24E,Q25H,Q26A
3,50_75,Q01A,Q02D,Q03A,Q04E,Q05D,Q06D,Q07A,Q08A,Q09A,...,Q17A,Q18A,Q19A,Q20H,Q21A,Q22C,Q23C,Q24C,Q25H,Q26A
4,50_75,Q01A,Q02D,Q03A,Q04B,Q05B,Q06F,Q07A,Q08A,Q09C,...,Q17A,Q18B,Q19F,Q20E,Q21B,Q22B,Q23E,Q24D,Q25E,Q26F


#### Passo preliminar: transformar os dados em itemsets (formato de entrada do algoritmo).

In [4]:
te = TransactionEncoder()
te_ary = te.fit(df.values).transform(df.values)
df_ = pd.DataFrame(te_ary, columns=te.columns_)

In [5]:
df_.head(5)

Unnamed: 0,0_25,25_50,50_75,75_100,Q01A,Q01B,Q01C,Q01D,Q01E,Q02A,...,Q25H,Q26A,Q26B,Q26C,Q26D,Q26E,Q26F,Q26G,Q26H,Q26I
0,False,False,True,False,True,False,False,False,False,False,...,True,True,False,False,False,False,False,False,False,False
1,False,False,True,False,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
2,True,False,False,False,True,False,False,False,False,True,...,True,True,False,False,False,False,False,False,False,False
3,False,False,True,False,True,False,False,False,False,False,...,True,True,False,False,False,False,False,False,False,False
4,False,False,True,False,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


# O procedimento de geração de regras de associação é basicamente realizado em dois passos.

#### 1° passo: descobrir os itemsets frequentes.

In [6]:
frequent_itemsets = apriori(df_, min_support=0.4, use_colnames=True)

#### 2° passo: descobrir as regras de associação

In [7]:
rules = association_rules(frequent_itemsets, metric='lift')

In [8]:
rules.head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(25_50),(Q03A),0.493779,0.992758,0.487651,0.987589,0.994794,-0.002552,0.583557
1,(Q03A),(25_50),0.992758,0.493779,0.487651,0.491208,0.994794,-0.002552,0.994948
2,(25_50),(Q14A),0.493779,0.91922,0.46481,0.941331,1.024054,0.010918,1.376884
3,(Q14A),(25_50),0.91922,0.493779,0.46481,0.505657,1.024054,0.010918,1.024027
4,(25_50),(Q16_32),0.493779,0.927019,0.453853,0.919143,0.991503,-0.003889,0.902582
5,(Q16_32),(25_50),0.927019,0.493779,0.453853,0.489583,0.991503,-0.003889,0.99178


In [9]:
asc_rules = rules[['antecedents', 'consequents', 'confidence', 'lift']]

In [10]:
asc_rules.tail(5)

Unnamed: 0,antecedents,consequents,confidence,lift
5433,(Q12A),"(Q03A, Q16_32, Q15A, Q14A, Q18A)",0.521936,1.091929
5434,(Q16_32),"(Q03A, Q12A, Q15A, Q14A, Q18A)",0.46234,1.007568
5435,(Q15A),"(Q03A, Q12A, Q16_32, Q14A, Q18A)",0.603241,1.102291
5436,(Q14A),"(Q03A, Q12A, Q16_32, Q15A, Q18A)",0.466263,0.985023
5437,(Q18A),"(Q03A, Q12A, Q16_32, Q15A, Q14A)",0.544854,1.024454


#### Filtro para pegar as regras que envolvem rendimento

In [11]:
notas = set(np.unique(df['NT_GER']))

In [12]:
df_new = pd.DataFrame()

for i in range(len(asc_rules)):
    if notas.intersection(asc_rules['consequents'][i]):
        df_new[i] = asc_rules.loc[i]

df_new = df_new.transpose()

#### Regras Geradas

In [13]:
df_new

Unnamed: 0,antecedents,consequents,confidence,lift
1,(Q03A),(25_50),0.491208,0.994794
3,(Q14A),(25_50),0.505657,1.02405
5,(Q16_32),(25_50),0.489583,0.991503
7,(Q03A),(50_75),0.437897,1.00472
8,(Q16_32),(50_75),0.438502,1.00611
184,"(Q14A, Q03A)",(25_50),0.503155,1.01899
186,(Q14A),"(25_50, Q03A)",0.499394,1.02408
187,(Q03A),"(25_50, Q14A)",0.462402,0.99482
190,"(Q16_32, Q03A)",(25_50),0.487097,0.986467
192,(Q16_32),"(25_50, Q03A)",0.483974,0.992461
