In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd

diabetes_data_preprocessed = pd.read_csv('../data/preprocessed_dataset.csv')
# Assuming 'diabetes_data_preprocessed' is your preprocessed dataset
# and 'diabetes' is the target variable
X = diabetes_data_preprocessed.drop('diabetes', axis=1)
y = diabetes_data_preprocessed['diabetes']

# Splitting the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Initialize the Decision Tree classifier with default parameters
mlp_clf = MLPClassifier(max_iter=1000)

# Train the classifier on the training data
mlp_clf.fit(X_train, y_train)

# Predict on the test set
y_pred = mlp_clf.predict(X_test)

scores = cross_val_score(mlp_clf, X_train, y_train, cv=5)

# Print the cross-validation scores
print("Cross-validation scores:", scores)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

# Print the metrics
print(f"Accuracy (MLP): {accuracy}")
print(f"Precision (MLP): {precision}")
print(f"Recall (MLP): {recall}")
print(f"F1 Score (MLP): {f1}")
print(f"ROC AUC Score (MLP): {roc_auc}")

Cross-validation scores: [0.9709375 0.9716875 0.9720625 0.9720625 0.971125 ]
Accuracy (MLP): 0.9718
Precision (MLP): 0.9872231686541738
Recall (MLP): 0.6785714285714286
F1 Score (MLP): 0.8043025676613463
ROC AUC Score (MLP): 0.8388756989784762
