In [1]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
import pandas as pd

# Load dataset
df = pd.read_csv(r"D:\Stroke_prediction_project\preprocessed_stroke_data.csv")

# Separate features and target
X = df.drop("Stroke", axis=1)
y = df["Stroke"]

# Encode categorical columns
encoders = {}
for col in X.select_dtypes(include=["object"]).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    encoders[col] = le   # store encoder for later use

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train XGBoost model (no deprecated params)
model = xgb.XGBClassifier(eval_metric="logloss")
model.fit(X_train_scaled, y_train)

# Save model, scaler, feature names, and encoders
with open(r"D:\Stroke_prediction_project\models\xgboost.pkl", "wb") as f:
    pickle.dump((model, scaler, X.columns.tolist(), encoders), f)
