In [1]:
import pickle
import numpy as np
import pandas as pd

In [2]:
# Manually defined feature columns based on the training process
trained_model_feature_columns = [
    'prev_csection', 'total_births_main',
    'race_cat_White', 'race_cat_Black or African American', 'race_cat_More than one race',
    'race_cat_Asian', 'race_cat_Native Hawaiian or Other Pacific Islander', 'bmi_cat_Normal 18.5-24.9',
    'bmi_cat_Underweight <18.5', 'bmi_cat_Obesity II 35.0-39.9', 'bmi_cat_Not Reported',
    'bmi_cat_Overweight 25.0-29.9', 'age_cat_45-49 years', 'age_cat_40-44 years',
    'age_cat_25-29 years', 'age_cat_35-39 years', 'age_cat_30-34 years', 'age_cat_20-24 years',
    'bmi_cat_Obesity I 30.0-34.9', 'bmi_cat_Unknown or Not Stated', 'age_cat_50 years and over',
    'age_cat_Under 15 years', 'state_main_Wisconsin', 'state_main_Louisiana', 'state_main_Colorado',
    'state_main_Utah', 'state_main_Kentucky', 'state_main_South Carolina', 'state_main_Nebraska',
    'state_main_Mississippi', 'state_main_Arkansas', 'state_main_Tennessee', 'state_main_Massachusetts',
    'state_main_Kansas', 'state_main_Arizona', 'state_main_Virginia', 'state_main_Minnesota',
    'state_main_Iowa', 'state_main_Nevada', 'state_main_Idaho', 'state_main_Connecticut',
    'state_main_Maryland', 'state_main_Washington', 'state_main_Oregon', 'state_main_Hawaii',
    'state_main_North Carolina', 'state_main_New Mexico', 'state_main_Indiana', 'state_main_Maine',
    'state_main_Ohio', 'state_main_Montana', 'state_main_Oklahoma', 'state_main_Delaware',
    'state_main_Michigan', 'state_main_New Jersey', 'state_main_New Hampshire', 'state_main_South Dakota',
    'state_main_Pennsylvania', 'state_main_West Virginia', 'state_main_Rhode Island',
    'state_main_Missouri', 'state_main_Georgia', 'state_main_Illinois', 'state_main_Vermont',
    'state_main_North Dakota', 'state_main_New York', 'state_main_Alaska', 'state_main_Florida',
    'state_main_District of Columbia', 'state_main_Texas', 'state_main_Wyoming', 'state_main_California'
]


In [6]:
# Load the trained models
with open('random_forest_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)

In [4]:
def preprocess_input(state, age_cat, bmi_cat, race_cat, prev_csection, total_births_main):
    # Create a DataFrame for the input
    input_df = pd.DataFrame({
        'prev_csection': [prev_csection],
        'total_births_main': [total_births_main],
        'state_main': [state],
        'age_cat': [age_cat],
        'bmi_cat': [bmi_cat],
        'race_cat': [race_cat]
    })
    
    # One-hot encode the input DataFrame
    input_df_encoded = pd.get_dummies(input_df, columns=['state_main', 'age_cat', 'bmi_cat', 'race_cat'])
    
    # Align the input DataFrame with the training feature columns, filling missing columns with 0
    input_df_encoded = input_df_encoded.reindex(columns=trained_model_feature_columns, fill_value=0)
    
    return input_df_encoded

In [5]:
# Chatbot function
def chatbot():
    print("Welcome to the Pregnancy Complication Risk Prediction Chatbot!")
    print("Please answer the following questions:")
    
    # Get user input
    state = input("Please enter the state you reside in: ")
    age_cat = input("Please enter your age group ('Under 15 years', '20-24 years', '25-29 years', '30-34 years', '35-39 years', '40-44 years', '45-49 years', '50 years and over'): ")
    bmi_cat = input("Please enter your BMI category ('Underweight <18.5', 'Normal 18.5-24.9', 'Overweight 25.0-29.9', 'Obesity I 30.0-34.9', 'Obesity II 35.0-39.9'): ")
    race_cat = input("Please enter your race category ('White', 'Black or African American', 'Asian', 'Native Hawaiian or Other Pacific Islander', 'More than one race'): ")
    prev_csection = int(input("Have you had a previous C-section? (1 for Yes, 0 for No): "))
    total_births_main = int(input("Please enter the total number of births: "))
    
    # Preprocess the input data
    input_data = preprocess_input(state, age_cat, bmi_cat, race_cat, prev_csection, total_births_main)
    
    # Make predictions using the Random Forest model
    rf_prediction = rf_model.predict(input_data)
    rf_probability = rf_model.predict_proba(input_data)
    
    # Output the predictions
    print("\nPrediction Results:")
    print("Random Forest Model Prediction:")
    print(f"  - Any Complication: {'High Risk' if rf_prediction[0] == 1 else 'Low Risk'} (Probability: {rf_probability[0][1]:.2f})")
    
    
    print("\nThank you for using the Pregnancy Complication Risk Prediction Chatbot!")

# Run the chatbot
chatbot()

Welcome to the Pregnancy Complication Risk Prediction Chatbot!
Please answer the following questions:


Please enter the state you reside in:  Nevada
Please enter your age group ('Under 15 years', '20-24 years', '25-29 years', '30-34 years', '35-39 years', '40-44 years', '45-49 years', '50 years and over'):  20-24 years
Please enter your BMI category ('Underweight <18.5', 'Normal 18.5-24.9', 'Overweight 25.0-29.9', 'Obesity I 30.0-34.9', 'Obesity II 35.0-39.9'):  Overweight 25.0-29.9
Please enter your race category ('White', 'Black or African American', 'Asian', 'Native Hawaiian or Other Pacific Islander', 'More than one race'):  Black or African American
Have you had a previous C-section? (1 for Yes, 0 for No):  0
Please enter the total number of births:  0



Prediction Results:
Random Forest Model Prediction:
  - Any Complication: High Risk (Probability: 0.55)

Thank you for using the Pregnancy Complication Risk Prediction Chatbot!


Feature names must be in the same order as they were in fit.

Feature names must be in the same order as they were in fit.

