In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
data = pd.read_csv('/content/drive/MyDrive/diabetes_binary_5050split_health_indicators_BRFSS2015.csv')

In [21]:
# 1. Separate features (X) and target (y)
X = data.drop("Diabetes_binary", axis=1)
y = data["Diabetes_binary"]

# 2. Stratified train-test split (70/30)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 3. Build a pipeline
mlp_pipeline = make_pipeline(
    StandardScaler(),
    MLPClassifier(hidden_layer_sizes=(10,), activation='relu',
                  solver='adam', random_state=1, max_iter=300)
)

# 4. Cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(mlp_pipeline, X, y, cv=cv, scoring="accuracy")

print("Cross-validation scores:", cv_scores)
print("Mean CV accuracy: %.3f ± %.3f" % (cv_scores.mean(), cv_scores.std()))

# 5. Fit and test accuracy
mlp_pipeline.fit(X_train, y_train)
y_pred = mlp_pipeline.predict(X_test)
print("Test Accuracy: %.3f" % accuracy_score(y_test, y_pred))

Cross-validation scores: [0.75344791 0.75465026 0.75180365 0.75116707 0.7548451 ]
Mean CV accuracy: 0.753 ± 0.001
Test Accuracy: 0.752
