In [6]:
class NUTRITIONRecommender:
    def __init__(self, age, weight, dietary_preferences):
        """
        Initialize the user health data.
        :param age: int - The age of the user.
        :param weight: float - The weight of the user in kilograms.
        :param dietary_preferences: list - A list of dietary preferences (e.g., ['vegetarian', 'low-carb', etc.]).
        """
        self.age = age
        self.weight = weight
        self.dietary_preferences = dietary_preferences

    def suggest_meal_plan(self):
        """
        Suggest meal plan for breakfast, lunch, and dinner based on the user's age, weight, and dietary preferences.
        :return: dict - A dictionary containing suggested meals for breakfast, lunch, and dinner.
        """
        breakfast = self.recommend_breakfast()
        lunch = self.recommend_lunch()
        dinner = self.recommend_dinner()

        return {
            "breakfast": breakfast,
            "lunch": lunch,
            "dinner": dinner
        }

    def recommend_breakfast(self):
        """
        Recommend a breakfast based on user preferences.
        :return: str - Recommended breakfast.
        """
        if 'vegetarian' in self.dietary_preferences:
            return "Oatmeal with fruits and nuts"
        elif 'low-carb' in self.dietary_preferences:
            return "Scrambled eggs with avocado"
        else:
            return "Whole grain toast with peanut butter and a banana"

    def recommend_lunch(self):
        """
        Recommend a lunch based on user preferences.
        :return: str - Recommended lunch.
        """
        if 'vegetarian' in self.dietary_preferences:
            return "Quinoa salad with chickpeas and veggies"
        elif 'low-carb' in self.dietary_preferences:
            return "Grilled chicken salad with olive oil dressing"
        else:
            return "Turkey sandwich with whole wheat bread and a side salad"

    def recommend_dinner(self):
        """
        Recommend a dinner based on user preferences.
        :return: str - Recommended dinner.
        """
        if 'vegetarian' in self.dietary_preferences:
            return "Stir-fried tofu with vegetables and brown rice"
        elif 'low-carb' in self.dietary_preferences:
            return "Grilled salmon with steamed broccoli and cauliflower rice"
        else:
            return "Grilled chicken with quinoa and roasted vegetables"

# Example usage:
user = NUTRITIONRecommender(age=30, weight=70, dietary_preferences=['low-carb'])
meal_plan = user.suggest_meal_plan()

for meal, recommendation in meal_plan.items():
    print(f"{meal.capitalize()}: {recommendation}")


Breakfast: Scrambled eggs with avocado
Lunch: Grilled chicken salad with olive oil dressing
Dinner: Grilled salmon with steamed broccoli and cauliflower rice


In [4]:
import pandas as pd
import numpy as np

# Function to generate random dietary preferences
def generate_dietary_preferences():
    preferences = ['vegetarian', 'vegan', 'low-carb', 'high-protein', 'paleo', 'keto']
    return np.random.choice(preferences)

# Number of rows and columns
num_rows = 1000  # Number of users
num_cols = 100   # Number of columns

# Creating base columns
data = {
    'User_ID': np.arange(1, num_rows + 1),
    'Age': np.random.randint(18, 65, size=num_rows),
    'Weight': np.random.randint(50, 100, size=num_rows),  # in kilograms
    'Height': np.random.randint(150, 200, size=num_rows),  # in cm
    'Body_Fat_Percentage': np.round(np.random.uniform(10, 35, size=num_rows), 2),
    'Activity_Level': np.random.choice(['Sedentary', 'Active', 'Very Active'], size=num_rows),
    'Dietary_Preferences': [generate_dietary_preferences() for _ in range(num_rows)],
    'Recommended_Calories': np.random.randint(1500, 3000, size=num_rows)
}

# Add 92 more synthetic columns to make the total 100 columns
for i in range(9, num_cols + 1):
    column_name = f'Feature_{i}'  # Giving generic names to synthetic columns
    data[column_name] = np.random.random(size=num_rows)  # Random float values between 0 and 1

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('nutrition_data.csv', index=False)

# Show the first few rows of the dataset
print(df.head())



   User_ID  Age  Weight  Height  Body_Fat_Percentage Activity_Level  \
0        1   39      63     183                18.67      Sedentary   
1        2   26      90     192                26.46         Active   
2        3   50      98     178                25.32    Very Active   
3        4   32      58     183                11.48         Active   
4        5   22      52     164                17.46         Active   

  Dietary_Preferences  Recommended_Calories  Feature_9  Feature_10  ...  \
0        high-protein                  1939   0.794280    0.165424  ...   
1               vegan                  1912   0.379562    0.648580  ...   
2            low-carb                  2598   0.367203    0.648678  ...   
3        high-protein                  2156   0.845184    0.534210  ...   
4                keto                  1573   0.641516    0.509760  ...   

   Feature_91  Feature_92  Feature_93  Feature_94  Feature_95  Feature_96  \
0    0.797099    0.848429    0.235514    0.82

In [36]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Class to store and save the model details along with evaluation metrics
class ModelObject:
    def _init_(self, model_name, model, params, best_params, evaluation_metrics, version):
        self.model_name = model_name
        self.model = model
        self.params = params
        self.best_params = best_params
        self.evaluation_metrics = evaluation_metrics
        self.version = version

    def log_details(self):
        log_message = f"Model: {self.model_name} (Version: {self.version})\n"
        log_message += f"Initial Parameters: {self.params}\n"
        log_message += f"Best Parameters after tuning: {self.best_params}\n"
        log_message += f"Evaluation Metrics: {self.evaluation_metrics}\n"
        return log_message

    def save(self, save_path):
        joblib.dump(self, save_path)
        print(f"Model saved at: {save_path}")

# Base Class for Dataset Handling
class Dataset:
    def _init_(self):
        self.data = None
        self.target = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self, file_path):
        # Load the dataset
        self.data = pd.read_csv(file_path)
        self.target = self.data['Calories Intake']  # Assuming this is your target column
        self.data.drop(columns=['Calories Intake', 'User ID'], inplace=True)  # Drop target and unnecessary columns

    def visualize_data(self):
        # Visualize data distributions
        plt.figure(figsize=(10, 6))
        sns.histplot(self.data, kde=True)
        plt.title('Distribution of Features')
        plt.show()

        # Convert categorical columns to numeric for correlation analysis
        if self.data.select_dtypes(include=['object']).shape[1] > 0:
            # Use one-hot encoding for categorical features
            data_numeric = pd.get_dummies(self.data, drop_first=True)
        else:
            data_numeric = self.data

        # Correlation heatmap
        plt.figure(figsize=(12, 8))
        sns.heatmap(data_numeric.corr(), annot=True, fmt=".2f", cmap='coolwarm', linewidths=0.5)
        plt.title('Correlation Heatmap')
        plt.show()

    def preprocess(self):
        # Identify categorical columns
        categorical_cols = self.data.select_dtypes(include=['object']).columns.tolist()
        numerical_cols = self.data.select_dtypes(exclude=['object']).columns.tolist()

        # Create a Column Transformer with OneHotEncoder for categorical features
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', 'passthrough', numerical_cols),  # Keep numerical columns unchanged
                ('cat', OneHotEncoder(), categorical_cols)  # One-hot encode categorical columns
            ]
        )

        # Apply the transformations
        self.data = preprocessor.fit_transform(self.data)

        # Train-test split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.data, self.target, test_size=0.2, random_state=42
        )

# Base Class for Model Selection and Tuning
class ModelSelector:
    def _init_(self):
        self.models = {
            'RandomForest': RandomForestRegressor(),
        }
        self.best_model_object = None
        self.version = 1  # Versioning starts at 1

    def hyperparameter_tuning(self, model, param_grid, X_train, y_train):
        grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1)
        grid_search.fit(X_train, y_train)
        return grid_search.best_estimator_, grid_search.best_params_

    def select_model(self, X_train, y_train, X_test, y_test):
        # Define parameter grids for each model
        param_grids = {
            'RandomForest': {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, 15]},
        }

        best_score = float('inf')
        for model_name, model in self.models.items():
            print(f"Tuning {model_name}...")
            tuned_model, best_params = self.hyperparameter_tuning(model, param_grids[model_name], X_train, y_train)

            # Evaluate on test data
            y_pred = tuned_model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            evaluation_metrics = {
                "MSE": mse,
                "R2 Score": r2,
                "MAE": mae
            }

            print(f"{model_name} Test MSE: {mse}")

            # Save model object only if it is the best one
            if mse < best_score:
                best_score = mse
                self.best_model_object = ModelObject(
                    model_name=model_name,
                    model=tuned_model,
                    params=param_grids[model_name],
                    best_params=best_params,
                    evaluation_metrics=evaluation_metrics,
                    version=self.version
                )

        print(f"Best Model: {self.best_model_object.model_name}")
        return self.best_model_object

    def save_best_model(self):
        if self.best_model_object:
            # Create the model's versioned file name
            save_path = f"{self.best_model_object.model_name}_v{self.version}.pkl"
            self.best_model_object.save(save_path)
            self.version += 1  # Increment the version for the next save

# Main AutoML Pipeline
class AutoMLPipeline:
    def _init_(self, dataset_path):
        self.dataset = Dataset()
        self.model_selector = ModelSelector()
        self.dataset_path = dataset_path

    def run(self):
        # Load and preprocess data
        print("Loading and Preprocessing Data...")
        self.dataset.load_data(self.dataset_path)

        # Perform EDA and visualization
        self.dataset.visualize_data()

        self.dataset.preprocess()

        # Model Selection and Evaluation
        print("Selecting the best model...")
        best_model = self.model_selector.select_model(
            self.dataset.X_train, self.dataset.y_train,
            self.dataset.X_test, self.dataset.y_test
        )

        # Save the best model with versioning
        self.model_selector.save_best_model()



In [45]:
import joblib

class AutoMLPipeline:
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        # Initialize other necessary attributes here if needed

    def run(self):
        # Your logic for running the pipeline
        print(f"Running the pipeline on dataset: {self.dataset_path}")
        # Add your model training, evaluation, etc., here

if __name__ == "__main__":
    dataset_path = r'/content/nutrition_data.csv'
    pipeline = AutoMLPipeline(dataset_path)
    pipeline.run()

    # Load the best model for inspection
    model_file_path = 'RandomForest_v1.pkl'  # Change to the latest model file path if needed
    loaded_model_object = joblib.load(model_file_path)

    # Inspect the contents of the loaded model object
    print(f"Model Name: {loaded_model_object.model_name}")
    print(f"Version: {loaded_model_object.version}")
    print(f"Parameters: {loaded_model_object.params}")
    print(f"Best Parameters: {loaded_model_object.best_params}")
    print(f"Evaluation Metrics: {loaded_model_object.evaluation_metrics}")

    # To inspect the model itself, use the sklearn model's methods
    model = loaded_model_object.model
    print(f"Model: {model}")


Running the pipeline on dataset: /content/nutrition_data.csv


FileNotFoundError: [Errno 2] No such file or directory: 'RandomForest_v1.pkl'

In [10]:
pip install auto-sklearn


Collecting auto-sklearn
  Downloading auto-sklearn-0.15.0.tar.gz (6.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting scikit-learn<0.25.0,>=0.24.0 (from auto-sklearn)
  Downloading scikit-learn-0.24.2.tar.gz (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m69.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mPreparing metadata [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error o

In [12]:
pip install scikit-learn pandas numpy




In [17]:
pip install cython swig




In [19]:
pip install auto-sklearn
import autosklearn.classification


SyntaxError: invalid syntax (<ipython-input-19-f5828022c6e8>, line 1)

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split


# Load dataset
data = pd.read_csv('nutrition_data.csv')

# Preprocess the dataset
X = data[['Age', 'Weight']]
y = data['Recommended_Calories']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)






In [28]:
X_train,X_test,y_train,y_test



(     Age  Weight
 29    20      81
 535   60      62
 695   33      80
 557   29      52
 836   32      59
 ..   ...     ...
 106   42      84
 270   28      99
 860   52      54
 435   21      60
 102   24      70
 
 [800 rows x 2 columns],
      Age  Weight
 521   43      52
 737   34      69
 740   27      52
 660   33      82
 411   35      97
 ..   ...     ...
 408   22      62
 332   22      69
 208   37      82
 613   56      83
 78    22      86
 
 [200 rows x 2 columns],
 29     2436
 535    1769
 695    2368
 557    1576
 836    2002
        ... 
 106    1575
 270    1732
 860    2549
 435    2722
 102    1524
 Name: Recommended_Calories, Length: 800, dtype: int64,
 521    1949
 737    2522
 740    2632
 660    2775
 411    2975
        ... 
 408    2823
 332    1610
 208    2858
 613    2305
 78     2612
 Name: Recommended_Calories, Length: 200, dtype: int64)

In [30]:
from recommendations import get_recommendations


ModuleNotFoundError: No module named 'recommendations'

In [29]:
import unittest
from recommendations import get_recommendations
from train_model import train

class TestFunctions(unittest.TestCase):

    def test_get_recommendations(self):
        result = get_recommendations(user_id=1)
        self.assertIsInstance(result, list)

    def test_train_model(self):
        model = train(data="sample_data")
        self.assertIsNotNone(model)

if __name__ == '__main__':
    unittest.main()



ModuleNotFoundError: No module named 'recommendations'