In [24]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, mean_squared_error, mean_absolute_error, r2_score
import time

In [25]:
def getData():
  # Load the dataset
  url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
  names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
  dataset = pd.read_csv(url, names=names)

  # Replace zeros with NaN and then fill with the mean
  dataset[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']] = dataset[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']].replace(0, np.NaN)
  dataset.fillna(dataset.mean(), inplace=True)

  # Split the dataset into features and target
  X = dataset.drop('Outcome', axis=1)
  y = dataset['Outcome']

  # Standardize the features
  scaler = StandardScaler()
  X = scaler.fit_transform(X)

  return X, y

In [29]:
def getScores(y_test, y_pred):
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    sensitivity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    ppv = cm[1, 1] / (cm[0, 1] + cm[1, 1])
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    mdae = np.median(np.abs(y_test - y_pred))
    r2 = r2_score(y_test, y_pred)

    return accuracy, sensitivity, specificity, ppv, rmse, mae, mdae, r2

In [49]:
def get_table(table):

    # Create a pandas DataFrame
    df = pd.DataFrame(table).T

    # Use pandas styling for a nice table display
    styled_df = df.style.set_table_styles(
        [{'selector': 'thead th',
          'props': [('background-color', '#4CAF50'),
                    ('color', 'white'),
                    ('font-weight', 'bold')]},
        {'selector': 'tbody tr:nth-child(odd)',
          'props': [('background-color', '#f2f2f2')]},
        {'selector': 'tbody tr:nth-child(even)',
          'props': [('background-color', '#ffffff')]},
          {'selector': 'td', 'props': [('text-align', 'left')]}]
    )

    # Display the table
    return(styled_df.hide(axis="index"))

In [55]:
X, y = getData()
dataset_sizes = [0.2, 0.4, 0.6, 0.8, 1.0]
results = []
results.append(['Size', 'Training Time', 'Testing Time', 'Accuracy', 'Sensitivity', 'Specificity', 'PPV', 'RMSE', 'MAE', 'MdAE', 'R2 Score'])

mlp = MLPClassifier(hidden_layer_sizes=(10, 10), max_iter=1000, random_state=42)

for size in dataset_sizes:
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=(0.7*size), random_state=42)

    # training time
    start_time = time.time()
    mlp.fit(X_train, y_train)
    training_time = time.time() - start_time

    # testing time
    start_time = time.time()
    y_pred = mlp.predict(X_test)
    testing_time = time.time() - start_time

    accuracy, sensitivity, specificity, ppv, rmse, mae, mdae, r2 = getScores(y_test, y_pred)

    results.append([str(size), training_time, testing_time, accuracy, sensitivity, specificity, ppv, rmse, mae, mdae, r2])
    print(f"\nResults for {int(size * 100)}% of the dataset:")
    print("----------------------------------------------------")
    print(f"Training Time: {training_time:.4f} seconds")
    print(f"Testing Time: {testing_time:.4f} seconds")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Sensitivity (Recall): {sensitivity:.4f}")
    print(f"Specificity: {specificity:.4f}")
    print(f"Positive Predictive Value (PPV): {ppv:.4f}")
    print(f"R^2 Score: {r2:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Median Absolute Error (MdAE): {mdae:.4f}")
    print("----------------------------------------------------")


get_table(results)




Results for 20% of the dataset:
----------------------------------------------------
Training Time: 0.4971 seconds
Testing Time: 0.0014 seconds
Accuracy: 0.7383
Sensitivity (Recall): 0.6579
Specificity: 0.7806
Positive Predictive Value (PPV): 0.6122
R^2 Score: -0.1583
Root Mean Squared Error (RMSE): 0.5116
Mean Absolute Error (MAE): 0.2617
Median Absolute Error (MdAE): 0.0000
----------------------------------------------------

Results for 40% of the dataset:
----------------------------------------------------
Training Time: 0.7442 seconds
Testing Time: 0.0005 seconds
Accuracy: 0.7215
Sensitivity (Recall): 0.5677
Specificity: 0.8033
Positive Predictive Value (PPV): 0.6056
R^2 Score: -0.2287
Root Mean Squared Error (RMSE): 0.5277
Mean Absolute Error (MAE): 0.2785
Median Absolute Error (MdAE): 0.0000
----------------------------------------------------





Results for 60% of the dataset:
----------------------------------------------------
Training Time: 1.0101 seconds
Testing Time: 0.0013 seconds
Accuracy: 0.7309
Sensitivity (Recall): 0.5871
Specificity: 0.8076
Positive Predictive Value (PPV): 0.6190
R^2 Score: -0.1866
Root Mean Squared Error (RMSE): 0.5187
Mean Absolute Error (MAE): 0.2691
Median Absolute Error (MdAE): 0.0000
----------------------------------------------------





Results for 80% of the dataset:
----------------------------------------------------
Training Time: 1.4704 seconds
Testing Time: 0.0014 seconds
Accuracy: 0.7604
Sensitivity (Recall): 0.7105
Specificity: 0.7857
Positive Predictive Value (PPV): 0.6279
R^2 Score: -0.0721
Root Mean Squared Error (RMSE): 0.4895
Mean Absolute Error (MAE): 0.2396
Median Absolute Error (MdAE): 0.0000
----------------------------------------------------

Results for 100% of the dataset:
----------------------------------------------------
Training Time: 1.5002 seconds
Testing Time: 0.0013 seconds
Accuracy: 0.7273
Sensitivity (Recall): 0.6375
Specificity: 0.7748
Positive Predictive Value (PPV): 0.6000
R^2 Score: -0.2047
Root Mean Squared Error (RMSE): 0.5222
Mean Absolute Error (MAE): 0.2727
Median Absolute Error (MdAE): 0.0000
----------------------------------------------------




0,1,2,3,4,5
Size,0.2,0.4,0.6,0.8,1.0
Training Time,0.497074,0.744173,1.010077,1.470372,1.500184
Testing Time,0.001431,0.000492,0.00135,0.001366,0.001276
Accuracy,0.738275,0.721519,0.730942,0.760355,0.727273
Sensitivity,0.657895,0.567708,0.587097,0.710526,0.6375
Specificity,0.7806,0.803324,0.80756,0.785714,0.774834
PPV,0.612245,0.605556,0.619048,0.627907,0.6
RMSE,0.51159,0.527713,0.518708,0.489535,0.522233
MAE,0.261725,0.278481,0.269058,0.239645,0.272727
MdAE,0.0,0.0,0.0,0.0,0.0
