In [42]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [43]:
def load_and_preprocess_data(filepath) :
    # Loading the dataset
    df = pd.read_csv(filepath)
    
    # Dropping the column GarbageValues
    df.drop(["GarbageValues"], axis = 'columns', inplace = True)
    
    # Dropping rows where 'Outcome' label has null value
    df.dropna(subset = ["Outcome"], inplace = True)
    
    # Filling null values in 'Pregnancies' column with 0.0
    df["Pregnancies"].fillna(0.0, inplace = True)
    
    # Filling null values in all other columns to the mean of the column
    df.fillna(df.mean(), inplace = True)
    
    # Splitting data into feature set and label set
    features = df.loc[:, df.columns != "Outcome"]
    outcome = df.loc[:, df.columns == "Outcome"]
    
    return (features, outcome)
#load_and_preprocess_data("modified_diabetes.csv")

In [44]:
# Split the data into training and testing sets
def split_and_standardize(X, y) :
    # First splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 5)
    
    # Now normalising the FEATURES of train and test set
    scaler = StandardScaler()
    scaled_train_features = scaler.fit_transform(X_train)
    scaled_test_features = scaler.fit_transform(X_test)
    
    return (scaled_train_features, scaled_test_features, y_train, y_test)

In [57]:
def create_model(X_train, y_train) :
    model_1 = MLPClassifier(hidden_layer_sizes = (58, 68, 67), 
                          activation = 'logistic', 
                          solver = 'adam', 
                          learning_rate = 'adaptive',
                          learning_rate_init = 0.001,
                          random_state = 42)
    
    model_2 = MLPClassifier(hidden_layer_sizes = (60, 60, 60), 
                          activation = 'relu', 
                          solver = 'sgd',
                          learning_rate = 'constant', 
                          learning_rate_init = 0.002,
                          random_state = 12)
    
    model_1.fit(X_train, y_train)
    model_2.fit(X_train, y_train)
    
    return (model_1, model_2)

In [58]:
def predict_and_evaluate(model, X_test, y_test) :
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_pred, y_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_mat = confusion_matrix(y_test, y_pred)
    
    return (accuracy, precision, recall, f1, conf_mat)

In [59]:
features, outcome = load_and_preprocess_data("./modified_diabetes.csv")
X_train, X_test, y_train, y_test = split_and_standardize(features, outcome)
model_1, model_2 = create_model(X_train, y_train)

a1, p1, r1, f1_1, c1 = predict_and_evaluate(model_1, X_test, y_test)
print("MODEL 1")
print("accuracy ", a1)
print("precision ", p1)
print("recall", r1)
print("f1 score ", f1_1)
print("conf ", c1)

print()

a2, p2, r2, f1_2, c2 = predict_and_evaluate(model_2, X_test, y_test)
print("MODEL 2")
print("accuracy ", a2)
print("precision ", p2)
print("recall", r2)
print("f1 score ", f1_2)
print("conf ", c2)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Pregnancies"].fillna(0.0, inplace = True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


MODEL 1
accuracy  0.7587719298245614
precision  0.75
recall 0.5730337078651685
f1 score  0.6496815286624203
conf  [[122  17]
 [ 38  51]]

MODEL 2
accuracy  0.7543859649122807
precision  0.7538461538461538
recall 0.550561797752809
f1 score  0.6363636363636364
conf  [[123  16]
 [ 40  49]]


