<a href="https://colab.research.google.com/github/matoslc/teste_colabo/blob/main/Notebooks/a_priori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importação do Arquivo




In [1]:
import pandas as pd
from pandas import read_csv
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
#Realiza a leitura do csv contendo uma amostra reduzida dos dados do dataset titanic
dataset = read_csv('https://telescopeinstorage.blob.core.windows.net/datasets/titanic-apriori.csv', sep=';' , engine='python')
dataset.head()

Unnamed: 0,Class,Sex,Age,Survived
0,3rd,Male,Child,No
1,3rd,Male,Child,No
2,3rd,Male,Child,No
3,3rd,Male,Child,No
4,3rd,Male,Child,No


In [None]:
#Obtêm a quatidade de linhas e colunas
qtdlinhas = dataset.shape[0]
qtdcols = dataset.shape[1]

In [None]:
print(qtdlinhas)
print(qtdcols)

2201
4


In [None]:
#Converte o dataset em uma lista de transacoes
transacoes = []
for i in range(0, qtdlinhas):
    linhaTransacao = []
    for j in range(0, qtdcols):        
        linhaTransacao.append(str(dataset.values[i,j]))
    
    transacoes.append(linhaTransacao)
print(transacoes)

[['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd', 'Male', 'Child', 'No'], ['3rd',

In [None]:
te = TransactionEncoder()

#Coloca em memória as trasações e interpreta a quantidade de colunas que serão geradas durante o processamento
te.fit(transacoes)

#O objeto TransactionEncoder faz a conversão das transações em uma matriz binária onde cada linha da matriz representa uma transação
matriz_transacoes = te.transform(transacoes)

In [None]:
print(te.columns_)

['1st', '2nd', '3rd', 'Adult', 'Child', 'Crew', 'Female', 'Male', 'No', 'Yes']


In [None]:
#Cria um dataframe auxiliar com a matriz binária (passo te.transform(transacoes)) de transações e as colunas obtidas (passo te.fit(transacoes))
dfAuxiliar = pd.DataFrame(matriz_transacoes, columns=te.columns_)
dfAuxiliar.head()

Unnamed: 0,1st,2nd,3rd,Adult,Child,Crew,Female,Male,No,Yes
0,False,False,True,False,True,False,False,True,True,False
1,False,False,True,False,True,False,False,True,True,False
2,False,False,True,False,True,False,False,True,True,False
3,False,False,True,False,True,False,False,True,True,False
4,False,False,True,False,True,False,False,True,True,False


In [None]:
#Obtêm os itemsets mais frequentes com um suporte mínimo igual a 0.01. O paramêtro use_colnames significa que vamos usar os nomes das colunas do DataFrame dfAuxiliar 
#para construir as regras de Associação
itemsets_freq = apriori(dfAuxiliar, min_support=0.005, use_colnames=True)

#Algumas métricas:
#- support(A->C) = support(A+C) [aka 'support'], range: [0, 1]
#- confidence(A->C) = support(A+C) / support(A), range: [0, 1]
#- lift(A->C) = confidence(A->C) / support(C), range: [0, inf]
#- leverage(A->C) = support(A->C) - support(A)*support(C), range: [-1, 1]
#- conviction = [1 - support(C)] / [1 - confidence(A->C)],

In [None]:
#Obtêm as regras de associação a partir dos itemsets mais frequêntes
regras = association_rules(itemsets_freq, metric="confidence", min_threshold=0.4)

In [None]:
#Ordena as Regras por confiança
regrasOrdenadas = regras.sort_values('confidence' , ascending=False)

In [None]:
regrasOrdenadas

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
40,"(1st, No)",(Adult),0.055429,0.950477,0.055429,1.000000,1.052103,0.002745,inf
141,"(1st, Male, No)",(Adult),0.053612,0.950477,0.053612,1.000000,1.052103,0.002655,inf
212,"(Crew, Yes, Male)",(Adult),0.087233,0.950477,0.087233,1.000000,1.052103,0.004320,inf
162,"(Female, 2nd, Child)",(Yes),0.005906,0.323035,0.005906,1.000000,3.095640,0.003998,inf
97,"(Crew, No)",(Adult),0.305770,0.950477,0.305770,1.000000,1.052103,0.015143,inf
...,...,...,...,...,...,...,...,...,...
21,(Child),(Female),0.049523,0.213539,0.020445,0.412844,1.933340,0.009870,1.339441
96,(Adult),"(Crew, Male)",0.950477,0.391640,0.391640,0.412046,1.052103,0.019395,1.034706
46,"(Female, Yes)",(1st),0.156293,0.147660,0.064062,0.409884,2.775859,0.040984,1.444359
139,"(Female, Yes)","(1st, Adult)",0.156293,0.144934,0.063607,0.406977,2.808012,0.040955,1.441876


In [None]:
#mantém apenas as colunas que vamos utilizar 
regrasOrdenadas = regrasOrdenadas[['antecedents', 'consequents', 'support', 'confidence']]
regrasOrdenadas

Unnamed: 0,antecedents,consequents,support,confidence
40,"(1st, No)",(Adult),0.055429,1.000000
141,"(1st, Male, No)",(Adult),0.053612,1.000000
212,"(Crew, Yes, Male)",(Adult),0.087233,1.000000
162,"(Female, 2nd, Child)",(Yes),0.005906,1.000000
97,"(Crew, No)",(Adult),0.305770,1.000000
...,...,...,...,...
21,(Child),(Female),0.020445,0.412844
96,(Adult),"(Crew, Male)",0.391640,0.412046
46,"(Female, Yes)",(1st),0.064062,0.409884
139,"(Female, Yes)","(1st, Adult)",0.063607,0.406977


In [None]:
#Analise apenas da coluna Survived
regras_sobrevivetes =  regrasOrdenadas[regrasOrdenadas['consequents'] == {'Yes'}]
#OU
subset_sobrevivou = {'Yes'}
regras_sobrevivetes =  regrasOrdenadas[  regrasOrdenadas['consequents'].apply(lambda x: subset_sobrevivou.issubset(x))]

In [None]:
regras_naoSobrevivetes =  regrasOrdenadas[regrasOrdenadas['consequents'] == {'No'}]

subset_Mulheres = {'Female'}
regras_mulheres = regrasOrdenadas[  regrasOrdenadas['antecedents'].apply(lambda x: subset_Mulheres.issubset(x))]
print(regras_mulheres)

               antecedents   consequents   support  confidence
162   (Female, 2nd, Child)         (Yes)  0.005906    1.000000
197    (Female, Crew, Yes)       (Adult)  0.009087    1.000000
90          (Female, Crew)       (Adult)  0.010450    1.000000
185    (Female, Child, No)         (3rd)  0.007724    1.000000
147      (Female, 2nd, No)       (Adult)  0.005906    1.000000
34           (1st, Female)       (Adult)  0.065425    0.993103
132     (1st, Female, Yes)       (Adult)  0.063607    0.992908
44           (1st, Female)         (Yes)  0.064062    0.972414
133   (1st, Female, Adult)         (Yes)  0.063607    0.972222
136          (1st, Female)  (Yes, Adult)  0.063607    0.965517
104          (Female, Yes)       (Adult)  0.143571    0.918605
15                (Female)       (Adult)  0.193094    0.904255
50           (Female, 2nd)       (Adult)  0.042254    0.877358
61           (Female, 2nd)         (Yes)  0.042254    0.877358
198  (Female, Crew, Adult)         (Yes)  0.009087    0