In [40]:
import  numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB

# 1. Carregando data.csv
data = pd.read_csv('Data_CSV/Data_Alzheimer_Diagnosis.csv')

# Breast_cancer_data_Suwal_2018.csv
# Data_Alzheimer_Diagnosis.csv
# data_Breast_Cancer_Wisconsin.csv
# data_Mammography.csv

from sklearn.impute import SimpleImputer

# Initialize the imputer
imputer = SimpleImputer(strategy='mean')

# Fit and transform the data
data_filled = imputer.fit_transform(data)
data_filled = pd.DataFrame(data_filled, columns=data.columns)

data = data_filled

# Supondo que a última coluna seja a classe
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values


print(X.shape)
print(y.shape)

# Converta para DataFrame para facilitar a seleção de características
df = pd.DataFrame(X)
corr_matrix = df.corr().abs()

# Encontre características altamente correlacionadas
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]  # Limite

# Dividir os dados
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

# Remova as mesmas características de ambos os conjuntos (treinamento e teste)
X_train_reduced = X_train.drop(to_drop, axis=1)
X_test_reduced = X_test.drop(to_drop, axis=1)

print(f"Shape of X_train_reduced: {X_train_reduced.shape}")
print(f"Shape of X_test_reduced: {X_test_reduced.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of y_test: {y_test.shape}")

(820, 0)
(820,)
Shape of X_train_reduced: (656, 0)
Shape of X_test_reduced: (164, 0)
Shape of y_train: (656,)
Shape of y_test: (164,)


In [41]:
from sklearn.preprocessing import StandardScaler  # Adicione essa linha para importar StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA


# Normalizar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#PCA analysis
pca = PCA(n_components=0.95)  # Preserve 95% of the variance
X_train_scaled = pca.fit_transform(X_train_scaled)
X_test_scaled = pca.transform(X_test_scaled)

# Inicializar o Bayesiano
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_scaled, y_train)
nb_predictions = nb_classifier.predict(X_test_scaled)

# Inicializar o LDA com shrinkage para reduzir a colinearidade
lda_classifier = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
lda_classifier.fit(X_train_scaled, y_train)
lda_predictions = lda_classifier.predict(X_test_scaled)

# Quadrado
qda_classifier = QuadraticDiscriminantAnalysis(reg_param=0.1)
qda_classifier.fit(X_train_scaled, y_train)
qda_predictions = qda_classifier.predict(X_test_scaled)

ValueError: at least one array or dtype is required

In [38]:
# 4. Avaliação dos modelos
#Função para avaliar cada modelo

def evaluate_with_confusion_matrix(name, y_true, y_pred):
        cm = confusion_matrix(y_true, y_pred)
        print(f'\n{name} Classifier - Confusion Matrix:')
        print(cm)

    # Acurácia por classe
        class_accuracies = cm.diagonal() / cm.sum(axis = 1)
        print(f'Acurácia por classe:{class_accuracies}')

    # Acurácia geral
        overall_accuracy = cm.diagonal().sum() / cm.sum()
        print(f'Acurácia geral: {overall_accuracy:.4f}')
        print(classification_report(y_true, y_pred))

evaluate_with_confusion_matrix('Naive Bayes',y_test, nb_predictions)
evaluate_with_confusion_matrix('Linear Discriminant Analysis', y_test, lda_predictions)
evaluate_with_confusion_matrix('Quadratic Discriminant Analysis', y_test, qda_predictions)


Naive Bayes Classifier - Confusion Matrix:
[[88  7]
 [ 2 43]]
Acurácia por classe:[0.92631579 0.95555556]
Acurácia geral: 0.9357
              precision    recall  f1-score   support

         0.0       0.98      0.93      0.95        95
         1.0       0.86      0.96      0.91        45

    accuracy                           0.94       140
   macro avg       0.92      0.94      0.93       140
weighted avg       0.94      0.94      0.94       140


Linear Discriminant Analysis Classifier - Confusion Matrix:
[[94  1]
 [ 3 42]]
Acurácia por classe:[0.98947368 0.93333333]
Acurácia geral: 0.9714
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98        95
         1.0       0.98      0.93      0.95        45

    accuracy                           0.97       140
   macro avg       0.97      0.96      0.97       140
weighted avg       0.97      0.97      0.97       140


Quadratic Discriminant Analysis Classifier - Confusion Matrix:
[[91 

In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Carregar os dados do arquivo CSV
df = data

# Supondo que a última coluna seja a variável alvo (classe)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Converter rótulos para valores numéricos, se necessário
if y.dtype == 'object':
    y = LabelEncoder().fit_transform(y)

# Criar o modelo Discriminante Linear
model = LinearDiscriminantAnalysis()
print('\n\n\n\n Linear')
# Definir o número de repetições do Monte-Carlo Cross-Validation
n_repeats = 100  # Número de iterações
test_size = 0.2  # 10% para teste

# Armazenar as acurácias
scores = []

# Loop para realizar as divisões aleatórias
for _ in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=None, stratify=y)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

# Exibir os resultados
scores = np.array(scores)
print('Acurácia por repetição:', scores)
print('Média da acurácia:', scores.mean())
print('Desvio padrão da acurácia:', scores.std())

model = LinearDiscriminantAnalysis()
print('\n\n\n\n Quadrático')
# Definir o número de repetições do Monte-Carlo Cross-Validation
n_repeats = 100  # Número de iterações
test_size = 0.1  # 10% para teste

# Armazenar as acurácias
scores = []

# Loop para realizar as divisões aleatórias
for _ in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=None, stratify=y)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

# Exibir os resultados
scores = np.array(scores)
print('Acurácia por repetição:', scores)
print('Média da acurácia:', scores.mean())
print('Desvio padrão da acurácia:', scores.std())



model = GaussianNB()
print('\n\n\n\n Naive Bayes')
# Definir o número de repetições do Monte-Carlo Cross-Validation
n_repeats = 100  # Número de iterações
test_size = 0.1  # 10% para teste

# Armazenar as acurácias
scores = []

# Loop para realizar as divisões aleatórias
for _ in range(n_repeats):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=None, stratify=y)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

# Exibir os resultados
scores = np.array(scores)
print('Acurácia por repetição:', scores)
print('Média da acurácia:', scores.mean())
print('Desvio padrão da acurácia:', scores.std())







 Linear
Acurácia por repetição: [0.97857143 0.96428571 0.97142857 0.98571429 0.93571429 0.95714286
 0.96428571 0.97142857 0.96428571 0.96428571 0.97142857 0.94285714
 0.92857143 0.96428571 0.96428571 0.95714286 0.95       0.97857143
 0.91428571 0.94285714 0.97142857 0.98571429 0.95       0.95714286
 0.96428571 0.95       0.93571429 0.95       0.94285714 0.97142857
 0.97142857 0.96428571 0.97857143 0.95714286 0.9        0.97142857
 0.96428571 0.95714286 0.94285714 0.97142857 0.94285714 0.99285714
 0.97142857 0.97142857 0.97142857 0.97857143 0.93571429 0.95714286
 0.97857143 0.95       0.95       0.96428571 0.97857143 0.93571429
 0.96428571 0.97857143 0.95714286 0.96428571 0.95714286 0.96428571
 0.92857143 0.94285714 0.92857143 0.96428571 0.95       0.97142857
 0.92857143 0.97857143 0.96428571 0.99285714 0.92857143 0.97142857
 0.95714286 0.93571429 0.97857143 0.98571429 0.95       0.96428571
 0.97142857 0.97142857 0.97142857 0.96428571 0.95714286 0.97857143
 0.95714286 0.93571429 0.9