<a href="https://colab.research.google.com/github/tmoura/softexIA/blob/main/Avaliando_Hipoteses_Metricas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Carregamento do Dataset

In [None]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

data = load_breast_cancer()

df = pd.DataFrame(data.data, columns=data.feature_names)

df['target'] = data.target

y = df['target']
X = df.drop(['target'], axis=1)

display(X)

### Normalização de todas as colunas

In [None]:
from sklearn import preprocessing
import numpy as np

x2 = X.values #retorna um array NumPy
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x2)
X = pd.DataFrame(x_scaled)

display(X)

In [53]:
import numpy as np

# Transforma para Array NumPy
X = np.array(X)
y = np.array(y)

### 10-*Folds*

In [54]:
from sklearn.model_selection import StratifiedKFold

folds = 10

kf = StratifiedKFold(n_splits = folds)

## 10 conjuntos de dados
X_train = []
y_train = []

X_test = []
y_test = []

for train_index, test_index in kf.split(X,y):
  X_train.append(X[train_index])
  X_test.append(X[test_index])

  y_train.append(y[train_index])
  y_test.append(y[test_index])

### Treinar Modelos

In [55]:
from sklearn import metrics
import numpy as np
from sklearn.tree import DecisionTreeClassifier

results = []

for i in range(folds):
  model = DecisionTreeClassifier(criterion="entropy")
  model = model.fit(X_train[i], y_train[i])

  result = model.predict(X_test[i])

  acc = metrics.accuracy_score(result, y_test[i])

  results.append(acc)


print(results)
show = round(np.mean(results) * 100)
print("{}%".format(show))

[0.9298245614035088, 0.8421052631578947, 0.9649122807017544, 0.8947368421052632, 0.9824561403508771, 0.9473684210526315, 0.9473684210526315, 0.9122807017543859, 0.9473684210526315, 0.9285714285714286]
93%


### Outra opção

### usando a função ***cross_val_score***

In [56]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import numpy as np

folds = 10

model = DecisionTreeClassifier(criterion="entropy")

scores = cross_val_score(model, X, y, cv=folds)

print(scores)
show = round(np.mean(scores) * 100)
print("{}%".format(show))

[0.92982456 0.89473684 0.94736842 0.9122807  0.98245614 0.94736842
 0.92982456 0.96491228 0.96491228 0.89285714]
94%


### Métricas de Avaliação

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None, stratify=y) # 80% treino e 20% teste

model = DecisionTreeClassifier(criterion="entropy")
model.fit(X_train, y_train)

result = model.predict(X_test)
acc = metrics.accuracy_score(result, y_test)
show = round(acc * 100)
print("{}%".format(show))

In [None]:
### Matriz de Confusão:

from sklearn.metrics import confusion_matrix

print(confusion_matrix(result, y_test))

In [None]:
tn, fp, fn, tp = confusion_matrix(result, y_test).ravel()

precision = tp / (tp + fp)
recall = tp / (tp+fn)
specificity = tn / (tn+fp)
f1_score = (2 * (precision * recall)) / (precision + recall)

print("Precision: {}%".format(round(precision * 100)))
print("Recall: {}%".format(round(recall * 100)))
print("Specificity: {}%".format(round(specificity * 100)))
print("F1-Score {}%".format(round(f1_score * 100)))

In [None]:
from sklearn.metrics import classification_report

print(classification_report(result, y_test))

In [None]:
roc = metrics.roc_auc_score(result, y_test)

print("ROC AUC: {}%".format(round(roc * 100)))