- user inputs to calories
- dividing calories into breakfast, lunch, dinner calories
- cluster food dataset into breakfast, lunch, dinner labels
- dividing combined_dataset into breakfast, lunch, dinner
- train model with calories(step2) input and the nutrients output
- breakfast, lunch, dinner calories to nutrients.
- nutrients to breakfast, lunch, dinner food items prediction.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Load dataset
file_path = "human_input_to_calories_dataset.csv"
data = pd.read_csv(file_path)

# Check dataset
print(data.head())

# Encode categorical variable 'gender'
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])

# Features and target
features = ['age', 'weight(kg)', 'height(m)', 'gender', 'BMI', 'BMR', 'activity_level']
target = 'calories_to_maintain_weight'

X = data[features].values
y = data[target].values

# Scale features and target
scaler = StandardScaler()
X = scaler.fit_transform(X)

y_mean = y.mean()
y_std = y.std()
y = (y - y_mean) / y_std  # Normalize target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the model
class CaloriesModel(nn.Module):
    def __init__(self):
        super(CaloriesModel, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(7, 128),  # Matches the 7 features
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.fc(x)

model = CaloriesModel()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch).squeeze()  # Ensure output is 1D for MSE loss
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    scheduler.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
total_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch).squeeze()
        loss = criterion(outputs, y_batch)
        total_loss += loss.item()

print(f"Test Loss: {total_loss/len(test_loader):.4f}")

# Predict on new data
def predict(inputs):
    inputs = scaler.transform([inputs])  # Ensure input matches training features
    inputs_tensor = torch.tensor(inputs, dtype=torch.float32)
    with torch.no_grad():
        prediction = model(inputs_tensor).item()
    return prediction * y_std + y_mean  # Reverse normalization

# Example prediction (must include all 7 features)
new_input = [25, 90, 1.8, 1, 27.78, 1800, 1.5]  # age, weight, height, gender, BMI, BMR, activity_level
predicted_calories = predict(new_input)
print(f"Predicted Calories to Maintain Weight: {predicted_calories:.2f}")

   Unnamed: 0  age  weight(kg)  height(m) gender        BMI      BMR  \
0           1    2   16.097862   0.932025      F  18.531612  958.584   
1           2    4   14.619374   0.916687      F  17.397496  932.383   
2           4    4   17.899918   0.997288      F  17.997414  977.578   
3           6    3   13.532880   1.022786      F  12.936609  944.689   
4           8    4   17.039484   1.053977      M  15.338909  799.229   

   activity_level  calories_to_maintain_weight  BMI_tags  Label  
0             1.2                    1150.3008         8     13  
1             1.7                    1585.0511         7      1  
2             1.9                    1857.3982         7      4  
3             1.9                    1794.9091         7      3  
4             1.9                    1518.5351         7      0  
Epoch 1/20, Loss: 0.5521
Epoch 2/20, Loss: 0.0170
Epoch 3/20, Loss: 0.0026
Epoch 4/20, Loss: 0.0013
Epoch 5/20, Loss: 0.0008
Epoch 6/20, Loss: 0.0006
Epoch 7/20, Loss: 0.0

In [8]:
# dividing calories into breakfast, lunch, dinner calories

def divide_calories(total_calories):
    # Define meal distribution percentages
    breakfast_ratio = 0.25
    lunch_ratio = 0.31
    dinner_ratio = 0.35

    # Calculate calories for each meal
    breakfast_calories = total_calories * breakfast_ratio
    lunch_calories = total_calories * lunch_ratio
    dinner_calories = total_calories * dinner_ratio

    return breakfast_calories, lunch_calories, dinner_calories

# Get meal-wise calorie distribution
breakfast, lunch, dinner = divide_calories(predicted_calories)

# Display results
print(f"Total Calories Required: {predicted_calories:.2f} kcal")
print(f"Breakfast: {breakfast:.2f} kcal")
print(f"Lunch: {lunch:.2f} kcal")
print(f"Dinner: {dinner:.2f} kcal")


Total Calories Required: 2732.14 kcal
Breakfast: 683.04 kcal
Lunch: 846.96 kcal
Dinner: 956.25 kcal


In [9]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import os

# Function to process each dataset
def process_food_dataset(file_path):
    # Load the dataset
    food_df = pd.read_csv(file_path)

    # Preprocessing: Select numeric columns for clustering
    numeric_columns = [
        "Caloric Value", "Fat", "Saturated Fats", "Monounsaturated Fats", "Polyunsaturated Fats",
        "Carbohydrates", "Sugars", "Protein", "Dietary Fiber", "Cholesterol", "Sodium", "Water",
        "Vitamin A", "Vitamin B1", "Vitamin B11", "Vitamin B12", "Vitamin B2", "Vitamin B3",
        "Vitamin B5", "Vitamin B6", "Vitamin C", "Vitamin D", "Vitamin E", "Vitamin K",
        "Calcium", "Copper", "Iron", "Magnesium", "Manganese", "Phosphorus", "Potassium",
        "Selenium", "Zinc", "Nutrition Density"
    ]

    # Drop rows with missing values in numeric columns
    food_df.dropna(subset=numeric_columns, inplace=True)

    # Keep a copy of the original numeric data for mapping
    food_df_original = food_df[numeric_columns].copy()

    # Normalize numeric data
    scaler = StandardScaler()
    food_df[numeric_columns] = scaler.fit_transform(food_df[numeric_columns])

    # Perform clustering on scaled data
    kmeans = KMeans(n_clusters=3, random_state=42)
    food_df['Cluster'] = kmeans.fit_predict(food_df[numeric_columns])

    # Compute mean of original values per cluster
    cluster_means_original = food_df_original.groupby(food_df['Cluster']).mean()

    # Map clusters to meal types based on nutritional criteria
    # Dinner: Lowest 'Caloric Value' (light calories)
    dinner_cluster = cluster_means_original['Caloric Value'].idxmin()
    remaining_clusters = cluster_means_original.drop(dinner_cluster)
    
    # Breakfast: Highest 'Carbohydrates' among remaining (high carbs)
    breakfast_cluster = remaining_clusters['Carbohydrates'].idxmax()
    
    # Lunch: The remaining cluster (balanced macros)
    lunch_cluster = remaining_clusters.drop(breakfast_cluster).index[0]

    # Create mapping dictionary
    cluster_mapping = {
        breakfast_cluster: 'Breakfast',
        lunch_cluster: 'Lunch',
        dinner_cluster: 'Dinner'
    }

    # Apply the mapping
    food_df['type'] = food_df['Cluster'].map(cluster_mapping)

    # Print characteristics to verify the mapping
    print(f"\nProcessing file: {os.path.basename(file_path)}")
    for meal in ['Breakfast', 'Lunch', 'Dinner']:
        cluster = [k for k, v in cluster_mapping.items() if v == meal][0]
        print(f"{meal} characteristics:")
        print(cluster_means_original.loc[cluster][['Caloric Value', 'Carbohydrates', 'Protein', 'Fat']].to_string())
        print()

    # Optional supervised learning (if pre-labeled data exists)
    if "type" in food_df.columns:
        labeled_df = food_df[food_df['type'].notnull()]
    else:
        labeled_df = pd.DataFrame(columns=numeric_columns + ["type"])

    # Train a RandomForestClassifier if labeled data is available
    if not labeled_df.empty:
        X = labeled_df[numeric_columns]
        y = labeled_df['type']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        clf = RandomForestClassifier(random_state=42)
        clf.fit(X_train, y_train)
        # Predict 'type' for all data
        food_df['type'] = clf.predict(food_df[numeric_columns])

    return food_df  # Return DataFrame with all columns and 'type'

# List of file paths to the datasets (replace with your actual paths)
file_paths = [
    "FOOD-DATA-GROUP1.csv",
    "FOOD-DATA-GROUP2.csv",
    "FOOD-DATA-GROUP3.csv",
    "FOOD-DATA-GROUP4.csv",
    "FOOD-DATA-GROUP5.csv"
]

# Process all datasets and combine the results
combined_df = pd.DataFrame()

for file_path in file_paths:
    processed_df = process_food_dataset(file_path)
    combined_df = pd.concat([combined_df, processed_df], ignore_index=True)

# Save the combined dataset
combined_df.to_csv("combined_food_dataset_with_details.csv", index=False)

# Display the first few rows of the combined dataset
print("\nCombined dataset preview:")
print(combined_df.head())


Processing file: FOOD-DATA-GROUP1.csv
Breakfast characteristics:
Caloric Value    402.033113
Carbohydrates     33.584768
Protein           20.687417
Fat               20.476821

Lunch characteristics:
Caloric Value    597.564103
Carbohydrates     13.107692
Protein           68.294872
Fat               28.523077

Dinner characteristics:
Caloric Value    129.565097
Carbohydrates      8.680856
Protein           12.080163
Fat                4.787202


Processing file: FOOD-DATA-GROUP2.csv
Breakfast characteristics:
Caloric Value    1363.222222
Carbohydrates     138.154333
Protein            12.622222
Fat                57.066667

Lunch characteristics:
Caloric Value    533.744186
Carbohydrates     94.269767
Protein           20.062791
Fat               10.358140

Dinner characteristics:
Caloric Value    168.797753
Carbohydrates     25.667045
Protein            2.682648
Fat                4.057633


Processing file: FOOD-DATA-GROUP3.csv
Breakfast characteristics:
Caloric Value    1643.1428

In [10]:
# dividing combined_dataset into breakfast, lunch, dinner

# Load dataset
data = pd.read_csv("combined_food_dataset_with_details.csv")  # Replace with actual file path


# Split dataset based on 'type' field
breakfast_data = data[data['type'] == 'Breakfast'].reset_index(drop=True)
lunch_data = data[data['type'] == 'Lunch'].reset_index(drop=True)
dinner_data = data[data['type'] == 'Dinner'].reset_index(drop=True)

print(breakfast_data)

# Save datasets
breakfast_data.to_csv("breakfast_data.csv", index=False)
lunch_data.to_csv("lunch_data.csv", index=False)
dinner_data.to_csv("dinner_data.csv", index=False)

print("Datasets split and saved successfully.")


     Unnamed: 0.1  Unnamed: 0                          food  Caloric Value  \
0              11          11             port salut cheese       1.143608   
1              21          21                asadero cheese       0.827113   
2              23          23  provolone cheese reduced fat       0.364928   
3              24          24              roquefort cheese       0.385023   
4              25          25           queso blanco cheese       0.646258   
..            ...         ...                           ...            ...   
213           140         140                  tomato juice      -0.585036   
214           141         141               vegetable juice      -0.524106   
215           205         205        pistachio nuts roasted       0.080125   
216           228         228                   bran flakes      -0.128055   
217           270         270                  adobo fresco       3.007233   

          Fat  Saturated Fats  Monounsaturated Fats  Polyunsatu

In [11]:
# train model with calories(step2) input and the nutrients output

# Prepare data for model
def cal_to_nut(data):
  features = ['Caloric Value']
  targets = [col for col in data.columns if col not in ['Caloric Value', 'type', 'food', "Unnamed: 0.1", "Unnamed: 0", "Cluster"]]

  X = data[features].values
  y = data[targets].values

  scaler_X = StandardScaler()
  scaler_y = StandardScaler()
  X = scaler_X.fit_transform(X)
  y = scaler_y.fit_transform(y)

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
  X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
  y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

  train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
  test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

  train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
  test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

  # Define the model
  class NutritionModel(nn.Module):
      def __init__(self, input_size, output_size):
          super(NutritionModel, self).__init__()
          self.fc = nn.Sequential(
              nn.Linear(input_size, 128),
              nn.ReLU(),
              nn.Linear(128, 64),
              nn.ReLU(),
              nn.Linear(64, output_size)
          )

      def forward(self, x):
          return self.fc(x)

  model = NutritionModel(input_size=1, output_size=len(targets))

  # Loss and optimizer
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.001)

  # Training loop
  epochs = 20
  for epoch in range(epochs):
      model.train()
      epoch_loss = 0
      for X_batch, y_batch in train_loader:
          optimizer.zero_grad()
          outputs = model(X_batch)
          loss = criterion(outputs, y_batch)
          loss.backward()
          optimizer.step()
          epoch_loss += loss.item()
      print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader):.4f}")



  # Evaluation
  # model.eval()
  # total_loss = 0
  # with torch.no_grad():
  #     for X_batch, y_batch in test_loader:
  #         outputs = model(X_batch)
  #         loss = criterion(outputs, y_batch)
  #         total_loss += loss.item()
  # print(f"Test Loss: {total_loss/len(test_loader):.4f}")

  return model, scaler_X, scaler_y, targets


In [12]:
# breakfast, lunch, dinner calories to nutrients.

def predict_nutrients(caloric_value, model, scaler_X, scaler_y):
      caloric_value = scaler_X.transform([[caloric_value]])
      caloric_value_tensor = torch.tensor(caloric_value, dtype=torch.float32)
      with torch.no_grad():
          prediction = model(caloric_value_tensor).numpy()
      return scaler_y.inverse_transform(prediction)

# Example test
# sample_calories = 500  # Example calorie input
# predicted_nutrients = predict_nutrients(sample_calories)
# print("Predicted Nutrients:")
# for i, col in enumerate(targets):
#     print(f"{col}: {predicted_nutrients[0][i]:.2f}")

In [13]:
breakfast_data = pd.read_csv("breakfast_data.csv")
lunch_data = pd.read_csv("lunch_data.csv")
dinner_data = pd.read_csv("dinner_data.csv")

bk_model, scaler_X1, scaler_y1, targets1 = cal_to_nut(breakfast_data)
lu_model, scaler_X2, scaler_y2, targets2 = cal_to_nut(lunch_data)
di_model, scaler_X3, scaler_y3, targets3 = cal_to_nut(dinner_data)

Epoch 1/20, Loss: 0.9396
Epoch 2/20, Loss: 0.8935
Epoch 3/20, Loss: 0.9088
Epoch 4/20, Loss: 0.9122
Epoch 5/20, Loss: 0.8700
Epoch 6/20, Loss: 0.8154
Epoch 7/20, Loss: 0.8580
Epoch 8/20, Loss: 0.8353
Epoch 9/20, Loss: 0.8300
Epoch 10/20, Loss: 0.7679
Epoch 11/20, Loss: 0.7688
Epoch 12/20, Loss: 0.7901
Epoch 13/20, Loss: 0.7798
Epoch 14/20, Loss: 0.7564
Epoch 15/20, Loss: 0.7623
Epoch 16/20, Loss: 0.7576
Epoch 17/20, Loss: 0.7600
Epoch 18/20, Loss: 0.8246
Epoch 19/20, Loss: 0.8388
Epoch 20/20, Loss: 0.7894
Epoch 1/20, Loss: 0.9099
Epoch 2/20, Loss: 0.8972
Epoch 3/20, Loss: 0.7958
Epoch 4/20, Loss: 0.7799
Epoch 5/20, Loss: 0.7341
Epoch 6/20, Loss: 0.7137
Epoch 7/20, Loss: 0.7238
Epoch 8/20, Loss: 0.7019
Epoch 9/20, Loss: 0.6931
Epoch 10/20, Loss: 0.7561
Epoch 11/20, Loss: 0.7318
Epoch 12/20, Loss: 0.7387
Epoch 13/20, Loss: 0.6761
Epoch 14/20, Loss: 0.6805
Epoch 15/20, Loss: 0.6650
Epoch 16/20, Loss: 0.6501
Epoch 17/20, Loss: 0.6829
Epoch 18/20, Loss: 0.7255
Epoch 19/20, Loss: 0.7791
Epoc

In [14]:
# breakfast, lunch, dinner calories to nutrients.
breakfast_calories, lunch_calories, dinner_calories = divide_calories(predicted_calories)

predicted_bk = predict_nutrients(breakfast_calories, bk_model, scaler_X1, scaler_y1)
predicted_lu = predict_nutrients(lunch_calories, bk_model, scaler_X2, scaler_y2)
predicted_di = predict_nutrients(dinner_calories, bk_model, scaler_X3, scaler_y3)

# Store predictions
breakfast_nutrients = {col: predicted_bk[0][i] for i, col in enumerate(targets1)}
lunch_nutrients = {col: predicted_lu[0][i] for i, col in enumerate(targets2)}
dinner_nutrients = {col: predicted_di[0][i] for i, col in enumerate(targets3)}

# Print predictions
print("Breakfast Nutrients:")
for col, value in breakfast_nutrients.items():
    print(f"{col}: {value:.2f}")

print("Lunch Nutrients:")
for col, value in lunch_nutrients.items():
    print(f"{col}: {value:.2f}")

print("Dinner Nutrients:")
for col, value in dinner_nutrients.items():
    print(f"{col}: {value:.2f}")

Breakfast Nutrients:
Fat: 564.81
Saturated Fats: 547.62
Monounsaturated Fats: 707.01
Polyunsaturated Fats: 733.55
Carbohydrates: -109.57
Sugars: 95.05
Protein: 379.76
Dietary Fiber: -164.63
Cholesterol: 71.35
Sodium: 225.32
Water: 400.45
Vitamin A: 8.84
Vitamin B1: 148.94
Vitamin B11: -44.00
Vitamin B12: 53.34
Vitamin B2: 342.65
Vitamin B3: 357.42
Vitamin B5: 188.79
Vitamin B6: 290.79
Vitamin C: -12.55
Vitamin D: 40.49
Vitamin E: 383.14
Vitamin K: 67.54
Calcium: 153.66
Copper: -9.72
Iron: 106.51
Magnesium: 162.93
Manganese: -34.99
Phosphorus: 498.89
Potassium: 369.87
Selenium: -3.52
Zinc: 385.88
Nutrition Density: 378.00
Lunch Nutrients:
Fat: 725.57
Saturated Fats: 721.08
Monounsaturated Fats: 777.48
Polyunsaturated Fats: 970.58
Carbohydrates: -111.57
Sugars: 81.04
Protein: 440.27
Dietary Fiber: -137.77
Cholesterol: 100.48
Sodium: 154.32
Water: 323.24
Vitamin A: 12.78
Vitamin B1: 126.40
Vitamin B11: -54.11
Vitamin B12: 54.70
Vitamin B2: 178.52
Vitamin B3: 452.26
Vitamin B5: 189.40
Vita

In [15]:
# import pandas as pd
# import numpy as np
# import random

# # Load datasets
# breakfast_data = pd.read_csv("/content/breakfast_data.csv")
# lunch_data = pd.read_csv("/content/lunch_data.csv")
# dinner_data = pd.read_csv("/content/dinner_data.csv")

# # Nutrient columns
# all_nutrients = [
#     "Caloric Value", "Fat", "Saturated Fats", "Monounsaturated Fats", "Polyunsaturated Fats",
#     "Carbohydrates", "Sugars", "Protein", "Dietary Fiber", "Cholesterol", "Sodium", "Water",
#     "Vitamin A", "Vitamin B1", "Vitamin B11", "Vitamin B12", "Vitamin B2", "Vitamin B3",
#     "Vitamin B5", "Vitamin B6", "Vitamin C", "Vitamin D", "Vitamin E", "Vitamin K",
#     "Calcium", "Copper", "Iron", "Magnesium", "Manganese", "Phosphorus", "Potassium",
#     "Selenium", "Zinc", "Nutrition Density"
# ]

# # Function to scale nutrients for a single recipe
# def scale_nutrients(recipe_row, scale_factor):
#     return {nutrient: recipe_row[nutrient] * scale_factor for nutrient in all_nutrients}

# # Function to calculate total nutrients with scaling
# def calculate_total_nutrients(recipes_df, target_calories):
#     total_calories = recipes_df["Caloric Value"].sum()
#     if total_calories == 0:
#         return None

#     scale_factor = target_calories / total_calories
#     total_nutrients = {nutrient: 0 for nutrient in all_nutrients}

#     for _, row in recipes_df.iterrows():
#         scaled = scale_nutrients(row, scale_factor)
#         for nutrient, value in scaled.items():
#             total_nutrients[nutrient] += value

#     return total_nutrients

# # Function to check if all nutrients are within ±30% of targets
# def is_within_30_percent(total_nutrients, target_nutrients):
#     for nutrient in all_nutrients:
#         total_value = total_nutrients.get(nutrient, 0)
#         target_value = target_nutrients.get(nutrient, 0)
#         if target_value == 0:  # Avoid division by zero
#             if total_value != 0:
#                 return False  # If target is 0, total must be 0
#             continue
#         diff_percent = (total_value - target_value) / target_value * 100
#         if diff_percent < -30 or diff_percent > 30:
#             return False
#     return True

# # Function to select recipes with nutrient constraints
# def select_recipes(meal_data, target_nutrients, meal_type, previous_selections=None):
#     if previous_selections is None:
#         previous_selections = set()

#     available_indices = [i for i in meal_data.index if i not in previous_selections]
#     if len(available_indices) < 3:
#         print(f"Not enough unique {meal_type} recipes available.")
#         return None, None

#     max_attempts = 1000  # Increase attempts for stricter constraints
#     best_recipes = None
#     best_total = None
#     best_max_diff = float('inf')

#     for _ in range(max_attempts):
#         selected_indices = random.sample(available_indices, 3)
#         selected_recipes = meal_data.loc[selected_indices]

#         target_calories = target_nutrients["Caloric Value"]
#         total_nutrients = calculate_total_nutrients(selected_recipes, target_calories)

#         if total_nutrients is None:
#             continue

#         # Check if within ±30% for all nutrients
#         if is_within_30_percent(total_nutrients, target_nutrients):
#             print(f"Selected {meal_type} recipes successfully within ±30% after {_+1} attempts.")
#             return selected_recipes, total_nutrients

#         # Track the best combination
#         max_diff = max(
#             [abs((total_nutrients.get(n, 0) - target_nutrients.get(n, 0)) / target_nutrients.get(n, 1) * 100)
#              for n in all_nutrients if target_nutrients.get(n, 0) != 0],
#             default=0
#         )
#         if max_diff < best_max_diff:
#             best_max_diff = max_diff
#             best_recipes = selected_recipes
#             best_total = total_nutrients

#     print(f"Could not find {meal_type} recipes within ±30% after {max_attempts} attempts.")
#     print(f"Best attempt max nutrient difference: {best_max_diff:.1f}%")
#     return best_recipes, best_total  # Return best attempt if no perfect match

# # Previous selections
# previous_selections = {'Breakfast': set(), 'Lunch': set(), 'Dinner': set()}

# # Select recipes ensuring ±30% for all nutrients
# breakfast_recipes, breakfast_total = select_recipes(
#     breakfast_data, breakfast_nutrients, "Breakfast", previous_selections['Breakfast']
# )
# if breakfast_recipes is not None:
#     previous_selections['Breakfast'].update(breakfast_recipes.index)

# lunch_recipes, lunch_total = select_recipes(
#     lunch_data, lunch_nutrients, "Lunch", previous_selections['Lunch']
# )
# if lunch_recipes is not None:
#     previous_selections['Lunch'].update(lunch_recipes.index)

# dinner_recipes, dinner_total = select_recipes(
#     dinner_data, dinner_nutrients, "Dinner", previous_selections['Dinner']
# )
# if dinner_recipes is not None:
#     previous_selections['Dinner'].update(dinner_recipes.index)

# # Display results
# def display_meal_plan(meal_recipes, meal_total, meal_type, target_nutrients):
#     if meal_recipes is not None:
#         print(f"\n{meal_type} Plan:")
#         print("Selected Recipes (Scaled to Match Target Calories):")
#         total_raw_calories = meal_recipes["Caloric Value"].sum()
#         scale_factor = target_nutrients["Caloric Value"] / total_raw_calories if total_raw_calories != 0 else 1

#         for idx, row in meal_recipes.iterrows():
#             scaled_calories = row["Caloric Value"] * scale_factor
#             print(f"- {row['food']} (Original: {row['Caloric Value']:.2f} kcal, Scaled: {scaled_calories:.2f} kcal)")

#         print(f"\nTotal {meal_type} Nutrients (Scaled):")
#         for nutrient in all_nutrients:
#             value = meal_total.get(nutrient, 0)
#             target = target_nutrients.get(nutrient, 0)
#             diff_percent = (value - target) / target * 100 if target != 0 else (0 if value == 0 else float('inf'))
#             print(f"{nutrient}: {value:.2f} (Target: {target:.2f}, Diff: {diff_percent:.1f}%)")

# # Display the meal plan
# display_meal_plan(breakfast_recipes, breakfast_total, "Breakfast", breakfast_nutrients)
# display_meal_plan(lunch_recipes, lunch_total, "Lunch", lunch_nutrients)
# display_meal_plan(dinner_recipes, dinner_total, "Dinner", dinner_nutrients)

# # Daily total
# if all([breakfast_total, lunch_total, dinner_total]):
#     daily_total = {
#         nutrient: breakfast_total.get(nutrient, 0) + lunch_total.get(nutrient, 0) + dinner_total.get(nutrient, 0)
#         for nutrient in all_nutrients
#     }
#     daily_target = {
#         nutrient: breakfast_nutrients.get(nutrient, 0) + lunch_nutrients.get(nutrient, 0) + dinner_nutrients.get(nutrient, 0)
#         for nutrient in all_nutrients
#     }

#     print("\nDaily Total Nutrients:")
#     for nutrient, value in daily_total.items():
#         target = daily_target.get(nutrient, 0)
#         diff_percent = (value - target) / target * 100 if target != 0 else (0 if value == 0 else float('inf'))
#         print(f"{nutrient}: {value:.2f} (Target: {target:.2f}, Diff: {diff_percent:.1f}%)")

In [16]:
# import pandas as pd
# import random
# import numpy as np

# def select_meal_recipes(meal_df, target_nutrients, max_attempts=1000, tolerance=0.05):
#     """
#     Selects 3 recipes from meal_df such that their summed nutrients are within ±tolerance of target_nutrients.
#     If no valid combination is found, the tolerance is gradually increased until a valid combination is found.
#     Returns the recipes or None if no valid combination is found after max_attempts.
#     """
#     # Extract relevant nutrient columns
#     nutrient_cols = list(target_nutrients.keys())

#     # Ensure meal_df has required columns
#     missing_cols = [col for col in nutrient_cols if col not in meal_df.columns]
#     if missing_cols:
#         raise ValueError(f"Meal data missing required columns: {missing_cols}")

#     # Calculate acceptable nutrient ranges
#     lower_bounds = {nutrient: value * (1 - tolerance) for nutrient, value in target_nutrients.items()}
#     upper_bounds = {nutrient: value * (1 + tolerance) for nutrient, value in target_nutrients.items()}

#     # Handle zero targets to avoid division by zero or impossible ranges
#     for nutrient in target_nutrients:
#         if target_nutrients[nutrient] == 0:
#             lower_bounds[nutrient] = -float('inf')  # Allow any non-negative sum
#             upper_bounds[nutrient] = 0.05  # Small tolerance to avoid strict zero

#     # Attempt to find valid combinations
#     for attempt in range(max_attempts):
#         # Randomly select 3 distinct recipes
#         sample = meal_df.sample(n=3, replace=False)
#         # Calculate total nutrients
#         total_nutrients = sample[nutrient_cols].sum()
#         # Check if all nutrients are within bounds
#         valid = True
#         for nut in nutrient_cols:
#             total = total_nutrients[nut]
#             # Handle cases where lower bound is negative (due to zero target)
#             if lower_bounds[nut] < 0:
#                 if total > upper_bounds[nut]:
#                     valid = False
#                     break
#             else:
#                 if not (lower_bounds[nut] <= total <= upper_bounds[nut]):
#                     valid = False
#                     break
#         if valid:
#             return sample
#         # If no valid combination found, increase tolerance slightly
#         if attempt == max_attempts - 1:
#             tolerance += 0.05  # Increase tolerance by 5%
#             lower_bounds = {nutrient: value * (1 - tolerance) for nutrient, value in target_nutrients.items()}
#             upper_bounds = {nutrient: value * (1 + tolerance) for nutrient, value in target_nutrients.items()}
#             print(f"Increasing tolerance to {tolerance * 100:.0f}% for {meal_df['type'].iloc[0]} recipes.")

#     # If no valid combination found after max_attempts
#     return None

# # Example usage after predicting nutrients for each meal
# # Assuming breakfast_data, lunch_data, dinner_data are loaded DataFrames
# # and breakfast_nutrients, lunch_nutrients, dinner_nutrients are dictionaries

# # Select Breakfast Recipes
# breakfast_recipes = select_meal_recipes(breakfast_data, breakfast_nutrients)
# # Select Lunch Recipes
# lunch_recipes = select_meal_recipes(lunch_data, lunch_nutrients)
# # Select Dinner Recipes
# dinner_recipes = select_meal_recipes(dinner_data, dinner_nutrients)

# # Function to display selected recipes
# def display_recipes(recipes, meal_name):
#     if recipes is None:
#         print(f"No valid {meal_name} recipes found within tolerance.")
#         return
#     print(f"\n{meal_name} Recipes:")
#     print(recipes[['food', 'Caloric Value'] + list(breakfast_nutrients.keys())[1:]])

# # Display results
# display_recipes(breakfast_recipes, "Breakfast")
# display_recipes(lunch_recipes, "Lunch")
# display_recipes(dinner_recipes, "Dinner")

In [17]:
# import pandas as pd
# import random
# import numpy as np
# from sklearn.metrics import mean_absolute_percentage_error

# def select_meal_recipes(meal_df, target_nutrients, max_attempts=1000, base_tolerance=0.05):
#     """
#     Selects 1-3 recipes that best match target nutrients, prioritizing:
#     1. Calories within ±10%
#     2. Macronutrients within ±15%
#     3. Micronutrients within ±25%
#     """
#     # Define priority groups and tolerances
#     nutrient_priority = {
#         'Caloric Value': 0.10,
#         'Protein': 0.15,
#         'Carbohydrates': 0.15,
#         'Fat': 0.15,
#         'default': 0.25
#     }

#     # Create scoring function
#     def calculate_score(actual, target):
#         scores = []
#         for nut in target.index:
#             tolerance = nutrient_priority.get(nut, nutrient_priority['default'])
#             if target[nut] == 0:
#                 if actual[nut] == 0:
#                     error = 0
#                 else:
#                     error = 1
#             else:
#                 error = abs(actual[nut] - target[nut]) / target[nut]
#             scores.append(min(error / tolerance, 1.0))  # Normalize to 0-1
#         return np.mean(scores)

#     best_score = float('inf')
#     best_combination = None
#     target_series = pd.Series(target_nutrients)

#     for _ in range(max_attempts):
#         # Randomly choose how many recipes to use (1-3)
#         num_recipes = random.choices([1, 2, 3], weights=[0.2, 0.3, 0.5])[0]

#         try:
#             sample = meal_df.sample(n=num_recipes, replace=False)
#         except ValueError:
#             continue  # Not enough unique recipes

#         total = sample[target_nutrients.keys()].sum()
#         score = calculate_score(total, target_series)

#         # Track best combination
#         if score < best_score:
#             best_score = score
#             best_combination = sample
#             if best_score < 0.1:  # Good enough match
#                 break

#     # Verify calorie match as absolute requirement
#     if best_combination is not None:
#         total_cal = best_combination['Caloric Value'].sum()
#         if not (0.9 * target_nutrients['Caloric Value'] <= total_cal <= 1.1 * target_nutrients['Caloric Value']):
#             return None  # Fail-safe for calories

#     return best_combination

# # Example usage with fallback logic
# def get_meal_with_fallback(meal_data, target_nutrients, meal_name):
#     result = select_meal_recipes(meal_data, target_nutrients)

#     if result is None:
#         print(f"Warning: Couldn't find perfect {meal_name} match. Using closest available.")
#         # Find single recipe closest to 1/3 of target
#         scaled_target = {k: v/3 for k, v in target_nutrients.items()}
#         closest = meal_data.iloc[
#             (meal_data[['Caloric Value']] - scaled_target['Caloric Value']).abs().argsort()[:1]
#         ]
#         result = closest

#     return result

# # Select meals with fallback
# breakfast_recipes = get_meal_with_fallback(breakfast_data, breakfast_nutrients, "breakfast")
# lunch_recipes = get_meal_with_fallback(lunch_data, lunch_nutrients, "lunch")
# dinner_recipes = get_meal_with_fallback(dinner_data, dinner_nutrients, "dinner")

# # Display function with nutritional summary
# def display_recipes(recipes, meal_name):
#     if recipes is None:
#         print(f"No {meal_name} recipes found")
#         return

#     print(f"\n{meal_name.upper()} ({len(recipes)} recipes)")
#     print("Selected Dishes:", ", ".join(recipes['food'].tolist()))

#     totals = recipes[list(breakfast_nutrients.keys())].sum()
#     targets = pd.Series(breakfast_nutrients if meal_name == "breakfast" else
#                        lunch_nutrients if meal_name == "lunch" else dinner_nutrients)

#     print("\nNutritional Summary:")
#     for nut in totals.index:
#         target = targets[nut]
#         actual = totals[nut]
#         diff_pct = (actual - target)/target * 100 if target != 0 else 0
#         print(f"{nut}: {actual:.1f} ({diff_pct:+.1f}%)")

# # Display results
# display_recipes(breakfast_recipes, "breakfast")
# display_recipes(lunch_recipes, "lunch")
# display_recipes(dinner_recipes, "dinner")

Working (both grok and deepseek)


In [18]:
import pandas as pd
import random
import numpy as np

def select_meal_recipes(meal_df, target_calories, target_nutrients, max_attempts=1000, base_tolerance=0.05):
    """
    Selects 1-3 recipes that best match target calories and nutrients.
    """
    nutrient_priority = {
        'Protein': 0.15,
        'Carbohydrates': 0.15,
        'Fat': 0.15,
        'default': 0.25
    }

    def calculate_score(actual_cal, actual_nutrients, target_cal, target_nut):
        scores = []
        cal_error = abs(actual_cal - target_cal) / target_cal if target_cal != 0 else (0 if actual_cal == 0 else 1)
        scores.append(min(cal_error / 0.10, 1.0))
        for nut, value in target_nut.items():
            actual = actual_nutrients[nut]
            tolerance = nutrient_priority.get(nut, nutrient_priority['default'])
            error = abs(actual - value) / value if value != 0 else (0 if actual == 0 else 1)
            scores.append(min(error / tolerance, 1.0))
        return np.mean(scores)

    best_score = float('inf')
    best_combination = None
    nutrient_cols = list(target_nutrients.keys())

    required_cols = ['Caloric Value'] + nutrient_cols
    missing_cols = [col for col in required_cols if col not in meal_df.columns]
    if missing_cols:
        raise ValueError(f"Meal data missing columns: {missing_cols}")

    for _ in range(max_attempts):
        num_recipes = random.choices([1, 2, 3], weights=[0.2, 0.3, 0.5])[0]
        try:
            sample = meal_df.sample(n=num_recipes, replace=False)
        except ValueError:
            continue

        total_cal = sample['Caloric Value'].sum()
        total_nutrients = sample[nutrient_cols].sum()
        score = calculate_score(total_cal, total_nutrients, target_calories, target_nutrients)

        if score < best_score:
            best_score = score
            best_combination = sample
            if best_score < 0.1:
                break

    return best_combination

def get_meal_with_fallback(meal_data, target_calories, target_nutrients, meal_name):
    result = select_meal_recipes(meal_data, target_calories, target_nutrients)
    if result is None or len(result) == 0:
        print(f"Warning: No {meal_name} recipes found. Using fallback.")
        closest = meal_data.iloc[
            (meal_data['Caloric Value'] - target_calories).abs().argsort()[:1]
        ]
        result = closest
    return result

def display_recipes(recipes, meal_name, target_calories, target_nutrients):
    if recipes is None or len(recipes) == 0:
        print(f"No {meal_name} recipes found")
        return

    print(f"\n{meal_name.upper()} ({len(recipes)} recipes)")
    print("Selected Dishes:", ", ".join(recipes['food'].tolist()))

    # Calculate total nutrients from selected recipes
    nutrient_cols = list(target_nutrients.keys()) + ['Caloric Value']
    totals = recipes[nutrient_cols].sum()
    target_with_cal = {'Caloric Value': target_calories, **target_nutrients}

    print("\nNutritional Summary:")
    for nut in target_with_cal:
        actual_value = totals[nut]  # Scalar value from Series
        target_value = target_with_cal[nut]  # Scalar value from dict
        diff_pct = (actual_value - target_value) / target_value * 100 if target_value != 0 else (0 if actual_value == 0 else float('inf'))
        print(f"{nut}: {actual_value:.1f} (target: {target_value:.1f}, diff: {diff_pct:+.1f}%)")

# Assuming these variables are defined from your previous code
breakfast_data = pd.read_csv("breakfast_data.csv")
lunch_data = pd.read_csv("lunch_data.csv")
dinner_data = pd.read_csv("dinner_data.csv")

# Example nutrient dictionaries and calorie targets (replace with your actual values)
# These should come from your predict_nutrients and divide_calories functions
breakfast_calories = 500  # Example value
lunch_calories = 700
dinner_calories = 600
breakfast_nutrients = {'Protein': 20, 'Carbohydrates': 60, 'Fat': 15}  # Example
lunch_nutrients = {'Protein': 30, 'Carbohydrates': 80, 'Fat': 20}
dinner_nutrients = {'Protein': 25, 'Carbohydrates': 70, 'Fat': 18}

# Select meals
breakfast_recipes = get_meal_with_fallback(breakfast_data, breakfast_calories, breakfast_nutrients, "breakfast")
lunch_recipes = get_meal_with_fallback(lunch_data, lunch_calories, lunch_nutrients, "lunch")
dinner_recipes = get_meal_with_fallback(dinner_data, dinner_calories, dinner_nutrients, "dinner")

# Display results
display_recipes(breakfast_recipes, "breakfast", breakfast_calories, breakfast_nutrients)
display_recipes(lunch_recipes, "lunch", lunch_calories, lunch_nutrients)
display_recipes(dinner_recipes, "dinner", dinner_calories, dinner_nutrients)


BREAKFAST (3 recipes)
Selected Dishes: goose meat raw, ham cheese sandwich, cheese spread

Nutritional Summary:
Caloric Value: 11.7 (target: 500.0, diff: -97.7%)
Protein: 4.0 (target: 20.0, diff: -80.0%)
Carbohydrates: -0.0 (target: 60.0, diff: -100.0%)
Fat: 15.6 (target: 15.0, diff: +3.7%)

LUNCH (3 recipes)
Selected Dishes: nutmeg butter oil, greenland halibut cooked, double whopper with cheese burger king

Nutritional Summary:
Caloric Value: 15.0 (target: 700.0, diff: -97.9%)
Protein: 3.7 (target: 30.0, diff: -87.8%)
Carbohydrates: 0.4 (target: 80.0, diff: -99.5%)
Fat: 18.9 (target: 20.0, diff: -5.4%)

DINNER (3 recipes)
Selected Dishes: taro leaves cooked, pea soup, oat bran bagel

Nutritional Summary:
Caloric Value: -0.6 (target: 600.0, diff: -100.1%)
Protein: 0.1 (target: 25.0, diff: -99.5%)
Carbohydrates: 1.0 (target: 70.0, diff: -98.6%)
Fat: -1.1 (target: 18.0, diff: -106.3%)
