In [162]:
#Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [6]:
# Defining the variables and their possible ranges
counties = ['Nairobi', 'Nakuru', 'Mombasa', 'Machakos', 'Makueni',
            'Wajir', 'Kilifi', 'Lamu', 'Vihiga', 'Busia']
num_observations = 1000

In [146]:
# Simulating data for each variable
data = pd.DataFrame({
    'County': np.random.choice(counties, num_observations * len(counties)),
    'Age': np.random.randint(15, 46, num_observations * len(counties)),
    'Education': np.random.choice(['Primary', 'Secondary', 'Tertiary'], num_observations * len(counties)),
    'Number_of_Antenatal_Visits': np.random.randint(1, 11, num_observations * len(counties)),
    'Maternal_BMI': np.random.normal(25, 5, num_observations * len(counties)), 
    'Distance_to_Healthcare': np.random.uniform(1, 20, num_observations * len(counties)), 
    'Gestational_Age': np.random.randint(30, 42, num_observations * len(counties)),
    'Socioeconomic_Status': np.random.choice(['Low', 'Middle', 'High'], num_observations * len(counties)),
    'Marital_Status': np.random.choice(['Single', 'Married', 'Separated','Divorced'], num_observations * len(counties)),
    'Geographic_Location': np.random.choice(['Rural', 'Urban'], num_observations * len(counties)),
    'Nutritional_Status': np.random.choice(['Low', 'High'], num_observations * len(counties)),
    'Pre-existing_Health_Conditions': np.random.choice(['Hypertention', 'HIV/AIDs', 'Diabetes','None'], num_observations * len(counties)),
    'Social_Support': np.random.choice(['Yes', 'No'], num_observations * len(counties)),
    'Quality_of_Care': np.random.choice(['Poor', 'Good'], num_observations * len(counties)),
    'Maternal_Mental_Health': np.random.choice(['Stress', 'Depression','Anxiety','None'], num_observations * len(counties)),
    'Mortality': np.random.choice(['Yes', 'No'], p=[0.05, 0.95], size=10000) 
    
})

In [None]:
# Spliting the data into 10 separate DataFrames, one for each county
data_per_county = [data[data['County'] == county] for county in counties]
data_per_county


In [167]:
# Displaying the simulated data
data.head(10)

Unnamed: 0,County,Age,Education,Number_of_Antenatal_Visits,Maternal_BMI,Distance_to_Healthcare,Gestational_Age,Socioeconomic_Status,Marital_Status,Geographic_Location,Nutritional_Status,Pre-existing_Health_Conditions,Social_Support,Quality_of_Care,Maternal_Mental_Health,Mortality
0,Makueni,37,Secondary,1,19.177773,17.310127,32,Low,Married,Urban,Low,HIV/AIDs,No,Poor,Depression,No
1,Nakuru,37,Primary,4,29.589634,8.772844,35,Low,Married,Rural,Low,,No,Good,Depression,No
2,Nakuru,30,Secondary,2,33.616054,15.5931,34,Low,Divorced,Urban,High,Hypertention,No,Poor,Anxiety,No
3,Wajir,37,Primary,1,26.298736,2.177217,37,Low,Separated,Urban,Low,Hypertention,No,Poor,,No
4,Lamu,33,Secondary,6,22.671269,3.762094,33,Low,Divorced,Urban,High,HIV/AIDs,No,Good,Anxiety,No
5,Busia,42,Tertiary,1,19.36346,15.366818,35,Middle,Divorced,Urban,Low,HIV/AIDs,Yes,Poor,,No
6,Busia,42,Tertiary,5,21.533391,2.15646,31,Low,Separated,Rural,High,,No,Poor,Stress,No
7,Nairobi,19,Tertiary,9,29.428111,7.535894,41,Low,Separated,Rural,Low,,No,Good,Depression,No
8,Busia,27,Primary,4,27.513125,5.745111,31,High,Married,Rural,High,Hypertention,No,Poor,Stress,No
9,Vihiga,41,Tertiary,7,21.374562,9.730592,32,Middle,Single,Urban,Low,HIV/AIDs,No,Poor,,No


In [150]:
#Data Preprocessing
data.dropna(inplace= True)


In [151]:
# Encoding categorical variables
categorical_features = ['Education', 'Socioeconomic_Status', 'Marital_Status', 'Geographic_Location', 'Nutritional_Status', 
                        'Pre-existing_Health_Conditions', 'Social_Support', 'Quality_of_Care', 'Maternal_Mental_Health']
numerical_features = ['Age', 'Number_of_Antenatal_Visits', 'Maternal_BMI', 'Distance_to_Healthcare', 'Gestational_Age']

In [152]:
#Defining preprocessing steps for numerical and categorical features
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),  
        ('cat', categorical_transformer, categorical_features) 
    ])

In [153]:
#Splitting the data into training and testing sets
X = data.drop('Mortality', axis=1)  
y = data['Mortality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [154]:
#Initializing and training a logistic regression model within a pipeline
model = Pipeline(steps = [('preprocessor',preprocessor),('classifier',LogisticRegression())])
model.fit(X_train,y_train)

In [155]:

y_train.value_counts()

Mortality
No     7603
Yes     397
Name: count, dtype: int64

In [164]:
# Making predictions on the test set
y_pred = model.predict(X_test)


In [163]:

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the logistic regression model: {accuracy}')

Accuracy of the logistic regression model: 0.943


In [161]:
#Feature Importance Analysis
# Get feature importances from the logistic regression model
feature_importances = pd.DataFrame(model.named_steps['classifier'].coef_[0], index=preprocessor.get_feature_names_out(X.columns), columns=['Coefficient'])
feature_importances = feature_importances.sort_values(by='Coefficient', ascending=False)

# Display feature importances
print("Feature Importances:")
print(feature_importances)


Feature Importances:
                                                  Coefficient
cat__Pre-existing_Health_Conditions_HIV/AIDs         0.224603
cat__Nutritional_Status_High                         0.210090
cat__Marital_Status_Divorced                         0.168309
cat__Maternal_Mental_Health_Depression               0.163194
cat__Socioeconomic_Status_Middle                     0.150819
cat__Education_Secondary                             0.147095
cat__Maternal_Mental_Health_Stress                   0.082834
cat__Pre-existing_Health_Conditions_None             0.072872
cat__Social_Support_Yes                              0.064882
cat__Quality_of_Care_Poor                            0.026134
cat__Geographic_Location_Rural                       0.015990
num__Age                                             0.015016
cat__Education_Primary                               0.006926
num__Distance_to_Healthcare                          0.000032
cat__Geographic_Location_Urban                   

                                                   Recommendations

Based on the feature importances derived from the logistic regression model, here are some recommendations for improving maternal healthcare services in the Kenyan counties:

Prioritize Maternal Health Programs for HIV/AIDs Patients:

Given the positive coefficient for "Pre-existing Health Conditions - HIV/AIDs", it is crucial to prioritize maternal health programs that specifically cater to pregnant women with HIV/AIDs to improve their health outcomes and reduce maternal mortality.

Focus on Nutritional Support:

Since "Nutritional Status - High" has a significant positive impact, emphasizing nutritional support and interventions for pregnant women to maintain a healthy diet can positively influence maternal health outcomes.

Support Maternal Mental Health:

Addressing maternal mental health issues, such as depression and stress (as indicated by positive coefficients), through mental health support programs and interventions can help reduce the risk of maternal mortality.

Enhance Socioeconomic Support:

Improving socioeconomic status and providing support to women from middle-income backgrounds can have a positive impact on maternal health outcomes, as indicated by the positive coefficient for "Socioeconomic Status - Middle".

Increase Access to Antenatal Care:

The negative coefficient for "Number of Antenatal Visits" suggests that increasing the number of antenatal visits for pregnant women is crucial to improving maternal health outcomes and reducing mortality rates.


Enhance Maternal Healthcare Quality:

Addressing the quality of care provided during pregnancy and childbirth, especially for cases where the quality is poor ("Quality of Care - Poor"), is essential for improving maternal health outcomes.