# Apredizagem por Regras

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as srn
import plotly
import pickle
from yellowbrick.classifier import ConfusionMatrix
from sklearn.metrics import classification_report

In [9]:
#!pip install Orange3
import Orange

# Base Risco de Crédito

## Pré-processamento

In [13]:
df_risco = Orange.data.Table('../Machine Learning e Data Science com Python de A à Z/Bases de dados/risco_credito_regras.csv')
df_risco

[[ruim, alta, nenhuma, 0_15 | alto],
 [desconhecida, alta, nenhuma, 15_35 | alto],
 [desconhecida, baixa, nenhuma, 15_35 | moderado],
 [desconhecida, baixa, nenhuma, acima_35 | alto],
 [desconhecida, baixa, nenhuma, acima_35 | baixo],
 ...
]

In [14]:
df_risco.domain

[historia, divida, garantias, renda | risco]

## CN2

In [15]:
cn2 = Orange.classification.rules.CN2Learner()
regras_risco = cn2(df_risco)

In [16]:
for regras in regras_risco.rule_list:
    print(regras)

IF renda==0_15 THEN risco=alto 
IF historia==boa AND divida!=alta THEN risco=baixo 
IF historia==boa AND garantias!=nenhuma THEN risco=baixo 
IF historia==boa AND renda!=15_35 THEN risco=baixo 
IF historia==boa THEN risco=moderado 
IF divida==alta THEN risco=alto 
IF historia!=desconhecida THEN risco=moderado 
IF garantias==adequada THEN risco=baixo 
IF renda==15_35 THEN risco=moderado 
IF historia==desconhecida THEN risco=alto 
IF TRUE THEN risco=alto 


Teste para:
- história: boa
- dívida: alta
- garantias: nenhuma
- renda > 35

e

- história: ruim
- divida: alta
- garantias: adequada
- renda <15

In [17]:
previsoes = regras_risco([['boa','alta','nenhuma','acima_35'], ['ruim','alta','adequada','0_15']])
previsoes

array([1, 0], dtype=int64)

In [20]:
df_risco.domain.class_var.values

('alto', 'baixo', 'moderado')

Registros foram classificados como:
- 1 --> 'baixo'
- 0 --> 'alto'

In [22]:
for i in previsoes:
    print(df_risco.domain.class_var.values[i])

baixo
alto


# Base Credit Data

## Pré-processamento

In [23]:
df_credit = Orange.data.Table('../Machine Learning e Data Science com Python de A à Z/Bases de dados/credit_data_regras.csv')
df_credit

[[66155.9, 59.017, 8106.53 | 0],
 [34415.2, 48.1172, 6564.75 | 0],
 [57317.2, 63.108, 8020.95 | 0],
 [42709.5, 45.752, 6103.64 | 0],
 [66952.7, 18.5843, 8770.1 | 1],
 ...
]

In [24]:
df_credit.domain

[income, age, loan | default]

In [26]:
base_dividida = Orange.evaluation.testing.sample(df_credit, n=0.25)
base_dividida

([[32423.8, 63.19, 2961.95 | 0],
  [47586.2, 42.275, 3343.06 | 0],
  [27218.6, 55.171, 4145 | 0],
  [20155.8, 41.9224, 3489.96 | 0],
  [68338.1, 34.3345, 12840.7 | 1],
  ...
 ],
 [[60113.3, 40.7743, 8253.38 | 0],
  [50879, 44.9651, 3257.01 | 0],
  [45677.9, 51.6931, 2966.25 | 0],
  [33546.3, 54.6985, 5347.3 | 0],
  [50414.3, 56.194, 2468.17 | 0],
  ...
 ])

In [28]:
credit_train = base_dividida[1]
credit_test = base_dividida[0]

print('train:',len(credit_train), 'e test:',len(credit_test))

train: 1500 e test: 500


## CN2

In [29]:
cn2 = Orange.classification.rules.CN2Learner()
regras_credit = cn2(credit_train)

In [34]:
print(f'{len(regras_credit.rule_list)} regras', end='\n\n')
for regras in regras_credit.rule_list:
    print(regras)

23 regras

IF age>=34.9966813726351 THEN default=0 
IF income>=69395.1164768077 THEN default=1 
IF loan<=2639.7101263171 AND income>=21448.8279935904 THEN default=0 
IF loan>=7708.31562497011 AND age>=18.1760434475727 THEN default=1 
IF income>=58132.4712652713 THEN default=0 
IF loan>=6342.56790924236 THEN default=1 
IF income>=50527.5841732509 THEN default=0 
IF loan>=5416.3577980789005 AND income>=50289.66474812481 THEN default=1 
IF loan>=5416.3577980789005 AND income>=49640.00470237809 THEN default=0 
IF loan>=5416.3577980789005 THEN default=1 
IF income>=31722.7309499867 AND income>=40496.2558229454 THEN default=0 
IF loan>=4265.17370408717 AND age>=22.983635846059798 THEN default=1 
IF loan<=2639.7101263171 AND loan>=2639.7101263171 THEN default=1 
IF loan<=2690.76813397945 AND age>=21.989767443854397 THEN default=0 
IF loan>=4625.19337762744 THEN default=1 
IF income>=31722.7309499867 THEN default=0 
IF loan>=3105.4430213977303 THEN default=1 
IF income>=24857.6948815025 THEN d

In [37]:
previsoes = Orange.evaluation.testing.TestOnTestData(credit_train, credit_test,[lambda testdata: regras_credit])
previsoes

<Orange.evaluation.testing.Results at 0x212ea7c93a0>

In [40]:
Orange.evaluation.CA(previsoes)

array([0.982])

## Classificador base - Majority learner

In [41]:
df_credit

[[66155.9, 59.017, 8106.53 | 0],
 [34415.2, 48.1172, 6564.75 | 0],
 [57317.2, 63.108, 8020.95 | 0],
 [42709.5, 45.752, 6103.64 | 0],
 [66952.7, 18.5843, 8770.1 | 1],
 ...
]

In [42]:
df_credit.domain

[income, age, loan | default]

In [43]:
majority = Orange.classification.MajorityLearner()
previsoes = Orange.evaluation.testing.TestOnTestData(df_credit, df_credit, [majority])

In [44]:
Orange.evaluation.CA(previsoes)

array([0.8585])

In [53]:
from collections import Counter
Counter(str(registro.get_class()) for registro in df_credit)

Counter({'0': 1717, '1': 283})

In [54]:
1717/2000

0.8585

# Base Census

## Majority Learner

In [55]:
df_census = Orange.data.Table('../Machine Learning e Data Science com Python de A à Z/Bases de dados/census_regras.csv')
df_census

[[39, State-gov, 77516, Bachelors, 13, ... | <=50K],
 [50, Self-emp-not-inc, 83311, Bachelors, 13, ... | <=50K],
 [38, Private, 215646, HS-grad, 9, ... | <=50K],
 [53, Private, 234721, 11th, 7, ... | <=50K],
 [28, Private, 338409, Bachelors, 13, ... | <=50K],
 ...
]

In [56]:
df_census.domain

[age, workclass, final-weight, education, education-num, marital-status, occupation, relationship, race, sex, capital-gain, capital-loos, hour-per-week, native-country | income]

In [57]:
majority = Orange.classification.MajorityLearner()
previsoes = Orange.evaluation.testing.TestOnTestData(df_census, df_census, [majority])

In [58]:
Orange.evalution.CA(previsoes)

AttributeError: module 'Orange' has no attribute 'evalution'

Algoritmos da aula:
- OneR : Weka explorer
- Prism
- CN2 : Orange
- Majority Learner