In [None]:
#Fix Code
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the dataset
# Replace 'pregnancy_data.csv' with the actual path to your dataset
# Load the dataset
data = pd.read_csv('https://raw.githubusercontent.com/margaretham/MartenalCare/main/Maternal%20Health%20Risk%20Data%20Set%20(1).csv')

# Display the first few rows of the dataset to understand its structure
print("First few rows of the dataset:")
print(data.head())

# Step 2: Data Preprocessing

# Check for missing values in the dataset
print("\nMissing values in the dataset before handling:")
print(data.isnull().sum())

# Handle missing values
# For numerical columns, fill with the median of each column
numerical_cols = data.select_dtypes(include=['float64', 'int64']).columns
data[numerical_cols] = data[numerical_cols].fillna(data[numerical_cols].median())

# For categorical columns, fill with the mode (most frequent value) of the column
categorical_cols = data.select_dtypes(include=['object']).columns
for col in categorical_cols:
    data[col].fillna(data[col].mode()[0], inplace=True)

# Check again after filling missing values
print("\nMissing values in the dataset after handling:")
print(data.isnull().sum())

# Encode 'RiskLevel' (the target variable) to numeric values
label_encoder = LabelEncoder()
data['RiskLevel'] = label_encoder.fit_transform(data['RiskLevel'])  # Low = 0, Mid = 1, High = 2

# Feature columns (excluding 'RiskLevel')
X = data.drop(columns=['RiskLevel'])
y = data['RiskLevel']

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Feature Scaling

# Standardizing features (important for neural networks)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshaping input data for LSTM (samples, timesteps, features)
X_train_scaled = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))  # (samples, time_steps=1, features)
X_test_scaled = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))  # (samples, time_steps=1, features)

# Step 4: Build the Model

model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]), activation='relu'))
model.add(LSTM(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))  # 3 classes: Low, Mid, High

# Step 5: Compile and Train the Model

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping and learning rate scheduler
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = LearningRateScheduler(lambda epoch, lr: lr * 0.9 if epoch > 10 else lr)

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_data=(X_test_scaled, y_test),
                    callbacks=[early_stop, lr_scheduler])

# Step 6: Predict with the model

# Making predictions on the test data
y_pred = model.predict(X_test_scaled)

# Convert predictions from probabilities to class labels (0, 1, 2)
y_pred_classes = np.argmax(y_pred, axis=1)

# Step 7: Evaluate Model Performance

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Step 8: Predict on new input (example: User Input)

# Ask for user input
print("\nPlease enter the following details:")

# Get user inputs for the features (ensure to convert inputs to the correct type)
age = float(input("Age: "))
systolic_bp = float(input("Systolic Blood Pressure (SystolicBP): "))
diastolic_bp = float(input("Diastolic Blood Pressure (DiastolicBP): "))
bs = float(input("Blood Sugar Level (BS): "))
body_temp = float(input("Body Temperature (BodyTemp °F): "))
heart_rate = float(input("Heart Rate (HeartRate): "))

# Create a 2D array for input features (single sample)
input_data = np.array([[age, systolic_bp, diastolic_bp, bs, body_temp, heart_rate]])

# Step 9: Scale the user input using the same scaler used for the training data
input_data_scaled = scaler.transform(input_data)

# Reshape the input for the LSTM model (samples, time_steps=1, features)
input_data_scaled = input_data_scaled.reshape((input_data_scaled.shape[0], 1, input_data_scaled.shape[1]))

# Step 10: Predict the class (RiskLevel) for the input data
risk_level_pred = model.predict(input_data_scaled)

# Convert prediction from probabilities to class labels
predicted_class = np.argmax(risk_level_pred, axis=1)

# Step 11: Display the predicted risk level
predicted_risk = label_encoder.inverse_transform(predicted_class)
print(f"\nPredicted Risk Level: {predicted_risk[0]}")

First few rows of the dataset:
   Age  SystolicBP  DiastolicBP    BS  BodyTemp  HeartRate  RiskLevel
0   25         130           80  15.0      98.0         86  high risk
1   35         140           90  13.0      98.0         70  high risk
2   29          90           70   8.0     100.0         80  high risk
3   30         140           85   7.0      98.0         70  high risk
4   35         120           60   6.1      98.0         76   low risk

Missing values in the dataset before handling:
Age            0
SystolicBP     0
DiastolicBP    0
BS             0
BodyTemp       0
HeartRate      0
RiskLevel      0
dtype: int64

Missing values in the dataset after handling:
Age            0
SystolicBP     0
DiastolicBP    0
BS             0
BodyTemp       0
HeartRate      0
RiskLevel      0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mode()[0], inplace=True)
  super().__init__(**kwargs)


Epoch 1/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 29ms/step - accuracy: 0.4039 - loss: 1.0950 - val_accuracy: 0.4286 - val_loss: 1.0809 - learning_rate: 0.0010
Epoch 2/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5055 - loss: 1.0716 - val_accuracy: 0.5419 - val_loss: 1.0298 - learning_rate: 0.0010
Epoch 3/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5715 - loss: 0.9918 - val_accuracy: 0.5517 - val_loss: 0.9034 - learning_rate: 0.0010
Epoch 4/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5931 - loss: 0.8731 - val_accuracy: 0.5911 - val_loss: 0.8113 - learning_rate: 0.0010
Epoch 5/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6408 - loss: 0.8025 - val_accuracy: 0.6158 - val_loss: 0.7644 - learning_rate: 0.0010
Epoch 6/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms

