In [None]:
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, classification_report

# Load the preprocessed dataset for validation
file_path = "indian_diabetes_preprocessed.csv"
df_new = pd.read_csv(file_path)

# Load the trained Random Forest model
rf_model = joblib.load("random_forest_model.pkl")

# Ensure feature set matches training data
expected_columns = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'Age', 'Pregnancies']

# Drop 'Outcome' if present and reorder columns
if 'Outcome' in df_new.columns:
    df_new_X = df_new.drop(columns=['Outcome'])
    y_true = df_new['Outcome']
else:
    df_new_X = df_new.copy()
    y_true = None

df_new_X = df_new_X[expected_columns]

# Make predictions
y_pred = rf_model.predict(df_new_X)
y_pred_proba = rf_model.predict_proba(df_new_X)[:, 1]

# Store predictions in the dataframe
df_new["Predicted_Outcome"] = y_pred
df_new["Diabetes_Probability"] = y_pred_proba

# Evaluate performance if labels exist
if y_true is not None:
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

# Save predictions to a new CSV file
df_new.to_csv("indian_diabetes_predictions.csv", index=False)

print("Validation complete. Predictions saved in 'indian_diabetes_predictions.csv'.")