# Gerando regras de associação com o A-Priori

Bibliotecas necessárias:

In [1]:
import numpy as np
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
df = pd.read_csv("./CE.csv")

In [3]:
df.head(5)

Unnamed: 0,NT_GER,QE_I01,QE_I02,QE_I03,QE_I04,QE_I05,QE_I06,QE_I07,QE_I08,QE_I09,...,QE_I17,QE_I18,QE_I19,QE_I20,QE_I21,QE_I22,QE_I23,QE_I24,QE_I25,QE_I26
0,25_50,Q01A,Q02A,Q03A,Q04E,Q05E,Q06D,Q07B,Q08C,Q09C,...,Q17B,Q18A,Q19B,Q20A,Q21A,Q22C,Q23C,Q24E,Q25E,Q26F
1,25_50,Q01B,Q02A,Q03A,Q04D,Q05D,Q06B,Q07F,Q08C,Q09D,...,Q17E,Q18A,Q19B,Q20A,Q21A,Q22D,Q23B,Q24E,Q25E,Q26A
2,25_50,Q01A,Q02B,Q03A,Q04D,Q05D,Q06B,Q07E,Q08B,Q09B,...,Q17A,Q18A,Q19C,Q20D,Q21A,Q22B,Q23C,Q24E,Q25E,Q26A
3,50_75,Q01A,Q02D,Q03A,Q04D,Q05C,Q06B,Q07D,Q08C,Q09B,...,Q17A,Q18A,Q19B,Q20A,Q21A,Q22C,Q23C,Q24E,Q25H,Q26A
4,25_50,Q01B,Q02A,Q03A,Q04D,Q05E,Q06C,Q07B,Q08D,Q09D,...,Q17B,Q18A,Q19C,Q20G,Q21A,Q22C,Q23D,Q24E,Q25E,Q26F


#### Passo preliminar: transformar os dados em itemsets (formato de entrada do algoritmo).

In [4]:
te = TransactionEncoder()
te_ary = te.fit(df.values).transform(df.values)
df_ = pd.DataFrame(te_ary, columns=te.columns_)

In [5]:
df_.head(5)

Unnamed: 0,0_25,25_50,50_75,75_100,Q01A,Q01B,Q01C,Q01D,Q01E,Q02A,...,Q_1633,Q_1635,Q_1641,Q_1642,Q_1643,Q_1650,Q_1651,Q_1652,Q_1653,Q_1699
0,False,True,False,False,True,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
1,False,True,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,False,True,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,True,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,True,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False


# O procedimento de geração de regras de associação é basicamente realizado em dois passos.

#### 1° passo: descobrir os itemsets frequentes.

In [6]:
frequent_itemsets = apriori(df_, min_support=0.4, use_colnames=True)

#### 2° passo: descobrir as regras de associação

In [7]:
rules = association_rules(frequent_itemsets, metric='lift')

In [8]:
rules.head(6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Q01A),(25_50),0.773635,0.569241,0.429721,0.555456,0.975783,-0.010665,0.96899
1,(25_50),(Q01A),0.569241,0.773635,0.429721,0.7549,0.975783,-0.010665,0.923562
2,(25_50),(Q03A),0.569241,0.986506,0.560159,0.984045,0.997505,-0.001401,0.84573
3,(Q03A),(25_50),0.986506,0.569241,0.560159,0.567821,0.997505,-0.001401,0.996714
4,(25_50),(Q12A),0.569241,0.871205,0.497016,0.87312,1.002198,0.00109,1.01509
5,(Q12A),(25_50),0.871205,0.569241,0.497016,0.570492,1.002198,0.00109,1.002913


In [9]:
asc_rules = rules[['antecedents', 'consequents', 'confidence', 'lift']]

In [10]:
asc_rules.tail(5)

Unnamed: 0,antecedents,consequents,confidence,lift
16193,(Q24E),"(Q18A, Q14A, Q15A, Q03A, Q12A, Q_1623)",0.63221,1.049086
16194,(Q15A),"(Q18A, Q14A, Q24E, Q03A, Q12A, Q_1623)",0.491253,1.052517
16195,(Q03A),"(Q18A, Q14A, Q15A, Q24E, Q12A, Q_1623)",0.416133,1.003738
16196,(Q12A),"(Q18A, Q14A, Q15A, Q24E, Q03A, Q_1623)",0.471207,1.037643
16197,(Q_1623),"(Q18A, Q14A, Q15A, Q24E, Q03A, Q12A)",0.42776,1.006787


#### Filtro para pegar as regras que envolvem rendimento

In [11]:
notas = set(np.unique(df['NT_GER']))

In [12]:
df_new = pd.DataFrame()

for i in range(len(asc_rules)):
    if notas.intersection(asc_rules['consequents'][i]):
        df_new[i] = asc_rules.loc[i]

df_new = df_new.transpose()

#### Regras Geradas

In [13]:
df_new

Unnamed: 0,antecedents,consequents,confidence,lift
0,(Q01A),(25_50),0.555456,0.975783
3,(Q03A),(25_50),0.567821,0.997505
5,(Q12A),(25_50),0.570492,1.0022
6,(Q14A),(25_50),0.582911,1.02401
9,(Q15A),(25_50),0.567747,0.997374
11,(Q18A),(25_50),0.567568,0.99706
12,(Q_1623),(25_50),0.569806,1.00099
201,"(Q01A, Q03A)",(25_50),0.553626,0.972568
203,(Q01A),"(25_50, Q03A)",0.545394,0.97364
205,(Q03A),"(Q01A, 25_50)",0.427707,0.995314
