In [53]:
# basic modules
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt 
# ml modules
from sklearn.metrics import (precision_score, recall_score, f1_score, classification_report)
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost.sklearn import XGBClassifier
from imblearn.under_sampling import RandomUnderSampler

In [39]:
df = pd.read_csv('data/maternal_health_risk_new.csv')
df.head()

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk


In [40]:
sampler = RandomUnderSampler(sampling_strategy= 'majority', random_state= 42)
df, df['RiskLevel'] = sampler.fit_resample(df, df['RiskLevel'])
df['RiskLevel'].value_counts()

RiskLevel
high risk    112
low risk     106
mid risk     106
Name: count, dtype: int64

In [41]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

num_features = df.select_dtypes(exclude='O').columns
cat_features = df.select_dtypes(include= 'O').columns

num_transformer = StandardScaler()
oh_encoder = OneHotEncoder()

preprocessor = ColumnTransformer(
    [
        ("StandardScaler", num_transformer, num_features)
        # ("OneHotEncoder", oh_encoder, cat_features)
    ]
)

In [42]:
X = df.drop(columns = ['RiskLevel'], axis =1)
y = df['RiskLevel']

X = preprocessor.fit_transform(X)

X.shape

(324, 6)

In [43]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train.shape, X_test.shape

((259, 6), (65, 6))

In [50]:
def model_evaluator(true, pred):
    pre = precision_score(true, pred, average= 'macro')
    recall = recall_score(true, pred, average= 'macro')
    f1 = f1_score(true, pred, average= 'macro')

    return pre, recall, f1 

In [54]:
models = {

    "Logistic Regression": LogisticRegression(class_weight= 'balanced'),
    "Naive Bayes": GaussianNB(),
    "K nearest Classifier": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(class_weight= 'balanced'),
    "Random Forest Classifier": RandomForestClassifier(class_weight= 'balanced'),
    "Gradient Boosting Classifier": GradientBoostingClassifier(),
    # "XGBoost Classifier": XGBClassifier(),
    
}

model_list = []
precision_list = []
recall_list = []
f1_list = []

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)

    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    model_train_precision, model_train_recall, model_train_f1 = model_evaluator(y_train, y_train_pred)
    model_test_precision, model_test_recall, model_test_f1 = model_evaluator(y_test, y_test_pred)


    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])


    print('Model performance for Training set')
    print("- Precision: {:.4f}".format(model_train_precision))
    print("- Recall: {:.4f}".format(model_train_recall))
    print("- F1 Score {:.4f}".format(model_train_f1))
    print(classification_report(y_train, y_train_pred))

    print('----------------------------------')
    
    print('Model performance for Test set')
    print("- Precision: {:.4f}".format(model_test_precision))
    print("- Recall: {:.4f}".format(model_test_recall))
    print("- F1 score: {:.4f}".format(model_test_f1))
    print(classification_report(y_test, y_test_pred))
    precision_list.append(model_test_precision)
    recall_list.append(model_test_recall)
    f1_list.append(model_test_f1)
    
    print('='*35)
    print('\n')

Logistic Regression
Model performance for Training set
- Precision: 0.5738
- Recall: 0.5799
- F1 Score 0.5749
              precision    recall  f1-score   support

   high risk       0.69      0.69      0.69        86
    low risk       0.58      0.66      0.62        86
    mid risk       0.46      0.39      0.42        87

    accuracy                           0.58       259
   macro avg       0.57      0.58      0.57       259
weighted avg       0.57      0.58      0.57       259

----------------------------------
Model performance for Test set
- Precision: 0.5387
- Recall: 0.5561
- F1 score: 0.5446
              precision    recall  f1-score   support

   high risk       0.81      0.81      0.81        26
    low risk       0.54      0.65      0.59        20
    mid risk       0.27      0.21      0.24        19

    accuracy                           0.58        65
   macro avg       0.54      0.56      0.54        65
weighted avg       0.57      0.58      0.57        65



Naiv