In [None]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE


data = pd.read_csv('/content/drive/MyDrive/heart.csv')


X = data.drop('HeartDisease', axis=1)
y = data['HeartDisease']


categorical_cols = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le
    joblib.dump(le, f'le_{col}.joblib')


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_res)
X_test_scaled = scaler.transform(X_test)
joblib.dump(scaler, 'scaler.joblib')


train_data = lgb.Dataset(X_train_scaled, label=y_train_res)

params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0,
    'is_unbalance': True
}

model = lgb.train(params,
                 train_data,
                 num_boost_round=100,
                 valid_sets=[train_data],
                 )

joblib.dump(model, 'lgbm_model.joblib')

y_pred = model.predict(X_test_scaled)
y_pred_class = [1 if x < 0.5 else 0 for x in y_pred]

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_class))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_class))


Confusion Matrix:
[[10 67]
 [93 14]]

Classification Report:
              precision    recall  f1-score   support

           0       0.10      0.13      0.11        77
           1       0.17      0.13      0.15       107

    accuracy                           0.13       184
   macro avg       0.13      0.13      0.13       184
weighted avg       0.14      0.13      0.13       184

Risk scores saved to 'risk_scores.csv'.


In [None]:
input_data = [
    1,      # Sex (encoded)
    58.0,   # Age
    2,      # ChestPainType (encoded)
    14.0,  # RestingBP
    90.0,  # Cholesterol
    0,      # FastingBS
    1,      # RestingECG (encoded)
    60.0,  # MaxHR
    0,      # ExerciseAngina (encoded)
    1.5,    # Oldpeak
    0       # ST_Slope (encoded)
]

scaled_input = scaler.transform([input_data])
prediction = model.predict(scaled_input)
y_pred_class = [1 if x > 0.5 else 0 for x in prediction]

print("Prediction:", y_pred_class)

risk_score = prediction[0]
print(risk_score)

Prediction: [1]
0.5485027036631479


