In [5]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv(r"healthcare-dataset-stroke-data.csv")

# Drop 'id' column
df.drop(columns=['id'], inplace=True)

# Fill missing values in 'bmi'
df['bmi'].fillna(df['bmi'].median(), inplace=True)

# Encode categorical columns
categorical_cols = df.select_dtypes(include='object').columns
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Features and target
X = df.drop(columns=['stroke'])
y = df['stroke']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Create LightGBM datasets
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Set parameters
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbosity': -1
}

# Train model
model = lgb.train(
    params,
    train_data,
    valid_sets=[test_data],
    num_boost_round=100,
    callbacks=[lgb.early_stopping(stopping_rounds=10)]
)

# Predict and evaluate
y_pred_prob = model.predict(X_test, num_iteration=model.best_iteration)
y_pred_label = (y_pred_prob >= 0.5).astype(int)

# Print results
print("Accuracy:", accuracy_score(y_test, y_pred_label))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_label))


Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.160772
Accuracy: 0.9500978473581213

Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       972
           1       0.33      0.02      0.04        50

    accuracy                           0.95      1022
   macro avg       0.64      0.51      0.51      1022
weighted avg       0.92      0.95      0.93      1022



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['bmi'].fillna(df['bmi'].median(), inplace=True)
