In [None]:
# ================================
# CTG Predictive Model Prototype
# With Manual User Input
# ================================

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# ---------------------------------------
# STEP 1: Synthetic Dataset Generation
# ---------------------------------------

np.random.seed(42)

def sample_variability(n):
    return np.random.choice(['absent', 'low', 'normal', 'excessive'], size=n, p=[0.05, 0.2, 0.7, 0.05])

def sample_acceleration(n):
    return np.random.choice(['present', 'absent'], size=n, p=[0.65, 0.35])

def sample_decel(n):
    return np.random.choice(['none', 'variable', 'early', 'late', 'prolonged'],
                            size=n, p=[0.6, 0.2, 0.05, 0.12, 0.03])

def label_from_rules(lb, var, ac, decel):
    """Simple CTG interpretation rule set."""
    if lb < 110 or lb > 170:
        return 'Pathologic'
    if decel in ('late', 'prolonged'):
        return 'Pathologic'
    if var == 'absent' and ac == 'absent':
        return 'Pathologic'
    if lb > 160 and var in ('absent', 'low'):
        return 'Pathologic'
    if var == 'low' or ac == 'absent' or decel == 'variable':
        return 'Suspect'
    return 'Normal'

# Generate synthetic samples
n = 5000
lbs = np.random.normal(loc=140, scale=12, size=n).astype(int)
vars_ = sample_variability(n)
acs = sample_acceleration(n)
decels = sample_decel(n)
labels = [label_from_rules(lbs[i], vars_[i], acs[i], decels[i]) for i in range(n)]

df = pd.DataFrame({
    'LB': lbs,
    'Variability': vars_,
    'Acceleration': acs,
    'Deceleration': decels,
    'Label': labels
})

# ---------------------------------------
# STEP 2: Model Training
# ---------------------------------------

X = df[['LB', 'Variability', 'Acceleration', 'Deceleration']]
y = df['Label']

cat_features = ['Variability', 'Acceleration', 'Deceleration']
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features)
], remainder='passthrough')

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=200, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, stratify=y, test_size=0.2, random_state=42
)

pipeline.fit(X_train, y_train)

# ---------------------------------------
# STEP 3: Evaluation
# ---------------------------------------

print("\n--- Model Evaluation ---")
y_pred = pipeline.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix (rows=true, cols=pred):\n",
      confusion_matrix(y_test, y_pred, labels=['Normal', 'Suspect', 'Pathologic']))

# ---------------------------------------
# STEP 4: Save Model
# ---------------------------------------

model_filename = "ctg_random_forest.pkl"
joblib.dump(pipeline, model_filename)
print(f"\n✅ Model saved to {model_filename}")

# ---------------------------------------
# STEP 5: User Manual Input
# ---------------------------------------

def predict_ctg(LB, Variability, Acceleration, Deceleration, model_path=model_filename):
    """Load model and predict Normal/Suspect/Pathologic for user input."""
    model = joblib.load(model_path)
    input_df = pd.DataFrame([{
        'LB': int(LB),
        'Variability': Variability.lower(),
        'Acceleration': Acceleration.lower(),
        'Deceleration': Deceleration.lower()
    }])
    pred = model.predict(input_df)[0]
    probs = model.predict_proba(input_df)[0]
    classes = model.classes_
    prob_dict = {classes[i]: round(float(probs[i]), 3) for i in range(len(classes))}
    return pred, prob_dict

# ---------------------------------------
# STEP 6: Console Prompt for Another User
# ---------------------------------------

print("\n--- Manual CTG Prediction ---")
print("Please enter the following values:")

try:
    lb_input = int(input("Enter Baseline Fetal Heart Rate (LB): "))
    var_input = input("Enter Variability (absent / low / normal / excessive): ").strip().lower()
    ac_input = input("Is Acceleration present or absent? ").strip().lower()
    decel_input = input("Enter Deceleration (none / variable / early / late / prolonged): ").strip().lower()

    pred, probs = predict_ctg(lb_input, var_input, ac_input, decel_input)
    print("\n--- Prediction Result ---")
    print(f"Prediction: {pred}")
    print("Probabilities:", probs)
except Exception as e:
    print("Error in input:", e)


--- Model Evaluation ---

Classification Report:
               precision    recall  f1-score   support

      Normal       1.00      1.00      1.00       330
  Pathologic       0.99      0.98      0.99       187
     Suspect       0.99      1.00      1.00       483

    accuracy                           1.00      1000
   macro avg       1.00      0.99      1.00      1000
weighted avg       1.00      1.00      1.00      1000

Confusion Matrix (rows=true, cols=pred):
 [[330   0   0]
 [  0 482   1]
 [  0   3 184]]

✅ Model saved to ctg_random_forest.pkl

--- Manual CTG Prediction ---
Please enter the following values:
