In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import random

# Load the dataset
file_path = r"C:\Users\Arin\Downloads\diet_recommendations_dataset.csv"  # Use 'r' to handle Windows paths
df = pd.read_csv(file_path)

# Drop Patient_ID as it's not useful for prediction
df.drop("Patient_ID", axis=1, inplace=True)

# Encode categorical variables
label_encoders = {}
categorical_columns = ["Gender", "Disease_Type", "Severity", "Physical_Activity_Level",
                       "Dietary_Restrictions", "Allergies", "Preferred_Cuisine", "Diet_Recommendation"]

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoders for later use

# Split features and target
X = df.drop("Diet_Recommendation", axis=1)
y = df["Diet_Recommendation"]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize numerical features
scaler = StandardScaler()
numerical_columns = ["Age", "Weight_kg", "Height_cm", "BMI", "Daily_Caloric_Intake",
                     "Cholesterol_mg/dL", "Blood_Pressure_mmHg", "Glucose_mg/dL",
                     "Weekly_Exercise_Hours", "Adherence_to_Diet_Plan", "Dietary_Nutrient_Imbalance_Score"]

X_train[numerical_columns] = scaler.fit_transform(X_train[numerical_columns])
X_test[numerical_columns] = scaler.transform(X_test[numerical_columns])

# Train models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
lr_model = LogisticRegression(max_iter=500)

rf_model.fit(X_train, y_train)
lr_model.fit(X_train, y_train)

# Make predictions
rf_pred = rf_model.predict(X_test)
lr_pred = lr_model.predict(X_test)

# Evaluate models
print("Random Forest Model")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.2f}")
print("Classification Report:\n", classification_report(y_test, rf_pred))

print("\nLogistic Regression Model")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.2f}")
print("Classification Report:\n", classification_report(y_test, lr_pred))

# Predict on a random patient
random_index = random.randint(0, len(X_test) - 1)
random_patient = X_test.iloc[random_index].values.reshape(1, -1)

rf_prediction = rf_model.predict(random_patient)[0]
lr_prediction = lr_model.predict(random_patient)[0]

# Decode predictions back to categorical labels
rf_pred_label = label_encoders["Diet_Recommendation"].inverse_transform([rf_prediction])[0]
lr_pred_label = label_encoders["Diet_Recommendation"].inverse_transform([lr_prediction])[0]

print("\nRandom Patient Prediction:")
print(f"Random Forest Prediction: {rf_pred_label}")
print(f"Logistic Regression Prediction: {lr_pred_label}")


Random Forest Model
Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        85
           1       1.00      1.00      1.00        52
           2       1.00      1.00      1.00        63

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Logistic Regression Model
Accuracy: 1.00
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        85
           1       1.00      1.00      1.00        52
           2       1.00      1.00      1.00        63

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200


Random Patient Prediction:
Random Forest Prediction: Low_Sodium
Logistic Regression Prediction: Low_Sodium


