In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, accuracy_score, mean_squared_error, r2_score, precision_recall_fscore_support

# Load your dataset
# df = pd.read_csv('your_dataset.csv')  # Uncomment this line to load your dataset

# For demonstration, I'm creating a sample DataFrame
data = pd.read_csv('dataset.csv')
df = pd.DataFrame(data)

# Selecting features and target variables
input_features = ['Age', 'Height', 'Weight', 'Gender', 'BMI', 'Fitness Level', 'Medical History']
target_features = ['Diet Recommended', 'Exercise', 'BMR', 'Calories']

# Encode categorical variables
label_encoders = {}
for col in input_features:
    if df[col].dtype == 'object':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

for col in target_features:
    if df[col].dtype == 'object':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Splitting data into training and testing sets
X = df[input_features]
y = df[target_features]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Parameters to control overfitting
max_depth = 5  # Maximum depth of the tree
min_samples_split = 10  # Minimum number of samples required to split an internal node
n_estimators = 100  # Number of trees in the forest for Random Forest

# Train Random Forest model for each target variable
models = {}
predictions = {}
reports = {}
metrics = {}

for target in target_features:
    if df[target].dtype == 'int64' or df[target].dtype == 'float64':
        # For regression tasks
        clf = RandomForestRegressor(random_state=42, n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
        clf.fit(X_train, y_train[target])
        models[target] = clf
        predictions[target] = clf.predict(X_test)
        
        # Calculate regression metrics
        mse = mean_squared_error(y_test[target], predictions[target])
        r2 = r2_score(y_test[target], predictions[target])
        metrics[target] = {
            "MSE": mse,
            "R2 Score": r2
        }
        print(f"MSE for {target}: {mse}")
        print(f"R2 Score for {target}: {r2}\n")
    else:
        # For classification tasks
        clf = RandomForestClassifier(random_state=42, n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
        clf.fit(X_train, y_train[target])
        models[target] = clf
        predictions[target] = clf.predict(X_test)
        
        # Calculate classification metrics
        accuracy = accuracy_score(y_test[target], predictions[target])
        precision, recall, f1_score, _ = precision_recall_fscore_support(y_test[target], predictions[target], average='weighted')
        metrics[target] = {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1_score
        }
        reports[target] = classification_report(y_test[target], predictions[target])
        print(f"Accuracy for {target}: {accuracy}\n")
        print(f"Classification Report for {target}:\n{reports[target]}\n")

# Optional: Inverse transform to get the original labels
for target in target_features:
    if target in label_encoders:
        if target not in ['BMR', 'Calories']:  # Ensure we only inverse transform classification targets
            y_test[target] = label_encoders[target].inverse_transform(y_test[target])
            predictions[target] = label_encoders[target].inverse_transform(predictions[target])

# Display predictions
print("Predictions:\n", pd.DataFrame(predictions))

# Display metrics
print("Metrics:\n", pd.DataFrame(metrics).transpose())


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy for Diet Recommended: 0.9597992916174735

Classification Report for Diet Recommended:
              precision    recall  f1-score   support

           0       1.00      0.86      0.93       160
           1       0.00      0.00      0.00        54
           2       0.00      0.00      0.00        87
           3       1.00      0.55      0.71       145
           4       0.88      1.00      0.94      2720
           5       1.00      1.00      1.00       242
           6       1.00      1.00      1.00       234
           7       1.00      0.47      0.63       129
           8       1.00      0.97      0.98        91
           9       1.00      1.00      1.00       131
          10       1.00      0.81      0.90       159
          11       1.00      1.00      1.00       222
          12       1.00      0.91      0.95        77
          13       1.00      0.38      0.55       144
          14       1.00      0.95      0.97       164
          15       1.00      1.00      1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Accuracy for Exercise: 0.880047225501771

Classification Report for Exercise:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        49
           1       1.00      0.15      0.26       249
           2       1.00      0.13      0.23       160
           3       1.00      0.79      0.88       145
           4       0.89      0.17      0.28       232
           5       1.00      0.16      0.28       242
           6       1.00      0.09      0.16       164
           7       0.00      0.00      0.00        77
           8       1.00      0.54      0.70       222
           9       1.00      0.09      0.17       142
          10       0.00      0.00      0.00        71
          11       1.00      0.12      0.22       298
          12       1.00      0.83      0.91       151
          13       0.90      1.00      0.95      2891
          14       0.92      1.00      0.96      2121
          15       0.79      1.00      0.88      1782
   

In [8]:
import pickle

# Save each model individually
for target in target_features:
    with open(f'model_{target}.pkl', 'wb') as f:
        pickle.dump(models[target], f)


In [9]:
import pickle
import pandas as pd

# Load the specific models for each target variable
models = {}
for target in target_features:
    with open(f'model_{target}.pkl', 'rb') as f:
        models[target] = pickle.load(f)

# Create a DataFrame for the input data
input_data = pd.DataFrame({
    'Age': [58],
    'Height': [4.9],
    'Weight': [51],
    'Gender': ['M'],
    'BMI': [22.98],
    'Fitness Level': ['Normal Weight'],
    'Medical History': ['Hypercholesterolaemia']
})

# Encoding the input data using the saved label encoders
for col, le in label_encoders.items():
    if col in input_data:
        input_data[col] = le.transform(input_data[col])

# Making predictions for each target variable
predictions = {}
for target in target_features:
    predictions[target] = models[target].predict(input_data)

# Inverse transforming the predictions for classification targets
for target in target_features:
    if target in label_encoders:
        predictions[target] = label_encoders[target].inverse_transform(predictions[target].astype(int))

# Displaying the predictions
for target, prediction in predictions.items():
    print(f"Prediction for {target}: {prediction[0]}")


Prediction for Diet Recommended: Heartwise Nourish
Prediction for Exercise: Running
Prediction for BMR: 1199.1998853733478
Prediction for Calories: 1831.0777466742404
