In [43]:
import pandas as pd
import numpy as np
import pickle

In [44]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

In [45]:
from sklearn.linear_model import LogisticRegression
model=LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [46]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier

In [47]:
from sklearn.metrics import accuracy_score

In [48]:
df=pd.read_csv('C:/Pregnancy_risk_Prediction-master/finalData.csv')

SEPARATE FEATURES AND TARGET


In [49]:
target_column = 'Risk Level'
x = df.drop(columns=[target_column])
y = df[target_column]

ENCODE THE TARGET VARIABLE


In [50]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

HANDLE MISSING VALUES

In [51]:
imputer = SimpleImputer(strategy='mean')
x = imputer.fit_transform(x)


SPLIT THE DATA

In [52]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [53]:
#with open('trained_model.pkl', 'wb') as file:
 #  pickle.dump(model,file)
#print(model)


STANDARDIZE THE FEATURES

In [54]:
feature_names = ['Age', 'Systolic BP', 'Diastolic BP', 'BS', 'Body Temp', 'Heart Rate']


x_train = pd.DataFrame(x_train, columns=feature_names)
x_test = pd.DataFrame(x_test, columns=feature_names)


In [55]:
#x_train = x_train.values 
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
pickle.dump(scaler, open("C:/Pregnancy_risk_Prediction-master/scaler.pkl", "wb"))

print("✅ StandardScaler has been saved successfully!")


✅ StandardScaler has been saved successfully!


MODEL USED

In [56]:
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Classifier": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "HistGradientBoosting": HistGradientBoostingClassifier(),
}

EVALUATING EACH MODEL

In [57]:
results = {}
for model_name, model in models.items():
    model.fit(x_train, y_train)               # Train the model
    y_pred = model.predict(x_test)            # Predict on the test set
    accuracy = accuracy_score(y_test, y_pred)    # Evaluate the model
    results[model_name] = accuracy

ACCURACY

In [58]:
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}")

Logistic Regression: 0.88
K-Nearest Neighbors: 0.90
Support Vector Classifier: 0.92
Decision Tree: 0.89
Random Forest: 0.91
HistGradientBoosting: 0.93


In [59]:
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]
print(f"\nBest Model: {best_model_name} with accuracy {results[best_model_name]:.2f}")



Best Model: HistGradientBoosting with accuracy 0.93


In [60]:
with open('C:/Pregnancy_risk_Prediction-master/trained_model.pkl', 'wb') as file:
 pickle.dump(best_model,file)
print(best_model)


HistGradientBoostingClassifier()


In [61]:
input_data = pd.DataFrame([[22, 120, 70, 7.01, 98, 76]], columns=feature_names)
std_data = scaler.transform(input_data)
prediction = best_model.predict(std_data)

In [None]:
with open('C:/Pregnancy_risk_Prediction-master/label_encoder.pkl', 'wb') as file:
         pickle.dump(encoder, file) 


In [63]:
predicted_label = encoder.inverse_transform(prediction)
print("Prediction (decoded):", predicted_label)

Prediction (decoded): ['Low']


In [64]:
y_train_series = pd.Series(y_train)
print(y_train_series.value_counts())


1    571
0    379
2     14
Name: count, dtype: int64
