In [2]:
# ---------------------------------------------
# Step 1: Import required libraries
# ---------------------------------------------
import pandas as pd
import pickle

# ---------------------------------------------
# Step 2: Load the processed dataset
# ---------------------------------------------
# This is the clean file with engineered features like comorbidity_sum
data = pd.read_csv('data/processed/phase3_features_with_riskscore.csv')

print("✅ Loaded phase3_features_with_riskscore.csv successfully!")
print("Shape of dataset:", data.shape)
print("First few rows:\n", data.head())

# ---------------------------------------------
# Step 3: Load the trained Voting Classifier model
# ---------------------------------------------
with open('models/voting_classifier_model.pkl', 'rb') as file:
    voting_model = pickle.load(file)

print("✅ Voting Classifier model loaded successfully!")

# ---------------------------------------------
# Step 4: Predict Risk Labels and Risk Probabilities
# ---------------------------------------------
data_for_prediction = data.drop(columns=['EventCKD35'])

risk_probabilities = voting_model.predict_proba(data_for_prediction)[:, 1]
risk_labels = voting_model.predict(data_for_prediction)

# Step 5: Add predictions to the dataset
data['Risk_Probability'] = risk_probabilities
data['Risk_Label'] = risk_labels

print("✅ Predictions added successfully!")
print(data[['Risk_Probability', 'Risk_Label']].head())

# Step 6: Save
data.to_csv('data/processed/final_dataset_for_powerbi.csv', index=False)
print("🎯 Final dataset 'final_dataset_for_powerbi.csv' saved successfully!")

✅ Loaded phase3_features_with_riskscore.csv successfully!
Shape of dataset: (491, 9)
First few rows:
    eGFRBaseline  CreatinineBaseline  AgeBaseline  HistoryDiabetes  DMmeds  \
0          93.3                59.0           64                0       0   
1         105.8                52.0           52                0       0   
2          99.8                57.0           56                0       0   
3          90.3                65.0           58                0       0   
4          79.7                70.0           63                1       1   

   HistoryCHD  ACEIARB  EventCKD35  comorbidity_sum  
0           0        0           0                0  
1           0        0           0                0  
2           0        0           0                0  
3           0        0           0                0  
4           0        1           0                3  
✅ Voting Classifier model loaded successfully!
✅ Predictions added successfully!
   Risk_Probability  Risk_Labe