In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, StandardScaler
from sklearn.metrics import hamming_loss, accuracy_score, classification_report
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
import joblib

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer, LabelEncoder
import numpy as np
import pandas as pd


def preprocess_for_model_with_drops(
    file_path,
    drop_diet_classes=None,
    drop_exercise_classes=None,
    test_size=0.2,
    random_state=42,
    diet_model_path="diet_model.pkl"
):
    """
    Preprocess data for training and testing models with the ability to drop specific target classes.

    Args:
        file_path (str): Path to the dataset.
        drop_diet_classes (list): List of diet classes to drop.
        drop_exercise_classes (list): List of exercise classes to drop.
        test_size (float): Proportion of the dataset to include in the test split.
        random_state (int): Random seed for reproducibility.
        diet_model_path (str): Path to save the trained diet model.

    Returns:
        tuple: X_train, y_diet_train, X_test, y_diet_test, 
               diet_model_path, mlb_diet (MultiLabelBinarizer), scaler (StandardScaler), 
               mlb_exercises (if exercises exist).
    """
    # Load data
    df = pd.read_excel(file_path)

    # Handle invalid numeric data
    numeric_cols = ["Age", "Height", "Weight", "BMI"]
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(df[col].median())

    # Process categorical data
    categorical_cols = ["Sex", "Level", "Fitness Goal", "Fitness Type", "Hypertension", "Diabetes"]
    label_encoders = {}
    for col in categorical_cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
        label_encoders[col] = le

    # Clean and split "Diet"
    df["Diet"] = (
        df["Diet"]
        .str.replace(r"[.,:(),;]", "", regex=True)
        .str.replace(r"\b(?:intake|and|or)\b", "", regex=True)
        .str.strip()
    )
    df["Diet_labels"] = df["Diet"].str.split()

    # Clean and split "Exercises" if present
    mlb_exercises = None
    if "Exercises" in df.columns:
        df["Exercises"] = (
            df["Exercises"]
            .str.replace(r"[.,:()]", "", regex=True)
            .str.replace(r"\b(?:and|or)\b", "", regex=True)
            .str.strip()
        )
        df["Exercise_labels"] = df["Exercises"].str.split()

        # Drop specific exercise classes if provided
        if drop_exercise_classes:
            df["Exercise_labels"] = df["Exercise_labels"].apply(
                lambda x: [item for item in x if item not in drop_exercise_classes]
            )

        # Initialize MultiLabelBinarizer for exercises
        mlb_exercises = MultiLabelBinarizer()
        y_exercises = mlb_exercises.fit_transform(df["Exercise_labels"])

    # Handle "chicken" variations
    df["Diet_labels"] = df["Diet_labels"].apply(
        lambda x: ["chicken" if item.lower() in ["chicken", "chickken"] else item for item in x]
    )

    # Drop specific diet classes if provided
    if drop_diet_classes:
        df["Diet_labels"] = df["Diet_labels"].apply(
            lambda x: [item for item in x if item not in drop_diet_classes]
        )

    # Initialize MultiLabelBinarizer for diet
    mlb_diet = MultiLabelBinarizer()
    y_diet = mlb_diet.fit_transform(df["Diet_labels"])

    # Combine numeric and categorical features
    X_numeric = df[numeric_cols]
    X_categorical = df[categorical_cols]

    # Standardize numeric features
    scaler = StandardScaler()
    X_numeric_scaled = scaler.fit_transform(X_numeric)

    # Combine scaled numeric and categorical features
    X = np.hstack([X_numeric_scaled, X_categorical.values])

    # Ensure `y_diet` matches the number of samples in `X`
    assert len(X) == len(y_diet), "X and y_diet must have the same number of samples."

    # Split data into training and testing sets
    X_train, X_test, y_diet_train, y_diet_test = train_test_split(
        X, y_diet, test_size=test_size, random_state=random_state
    )

    # Include exercise binarizer and labels if present
    if mlb_exercises is not None:
        y_exercises_train, y_exercises_test = train_test_split(
            y_exercises, test_size=test_size, random_state=random_state
        )
    return X_train,y_diet_train,X_test,y_diet_test,y_exercises_train,y_exercises_test,diet_model_path,mlb_diet,mlb_exercises,scaler,label_encoders
        



In [15]:
# Main execution
# Preprocess data
file_path = "data/excercise and diet/diet_gym.xlsx"
X_train,y_diet_train,X_test,y_diet_test,y_exercises_train,y_exercises_test,diet_model_path,mlb_diet,mlb_exercises,scaler,label_encoders = preprocess_for_model_with_drops(file_path,drop_diet_classes=[ "law",
    "coldpressed",
    "juiceand",
    "juicekale",
    "protein",
    "products",
    "spandwich",
    "standwish","Intake","Lettuce;", "Nuts;", "Onion;", "Seeds;", "Spandwich;", "Teff;", "chestnut;Protein", "legumes;", "Cattoge", "Icebetg", "Papper", "Standwish", "Spandwich;", "Law", "Diet"
]
)


In [16]:
import os
import json
def save_preprocessor(scaler,label_dict,path,mlb_diet,mlb_excercises):
    joblib.dump(scaler, os.path.join(path ,"scaler.pkl"))
    joblib.dump(mlb_diet, os.path.join(path ,"mlb_diet.pkl"))
    joblib.dump(mlb_excercises, os.path.join(path ,"mlb_exercis.pkl"))


    new_dict={}
    for name,model in label_dict.items():
        file_name=name+".pkl"
        joblib.dump(model,os.path.join(path,name+".pkl"))
        new_dict[name]=file_name
    with open(path+"/model_paths.json", "w") as f:
        json.dump(new_dict, f)
        
    return _



In [17]:
save_preprocessor(scaler,label_encoders,path="models/gym_diet/preprocessor",mlb_diet=mlb_diet,mlb_excercises=mlb_exercises)

''

In [18]:
def build_model(input_shape, output_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(output_shape, activation='sigmoid')  # Sigmoid for multi-label output
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model
def train_and_save_model(X_train, y_train, X_test, y_test, output_path, mlb, scaler):
    model = build_model(X_train.shape[1], y_train.shape[1])
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

    # Evaluate the model
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    print("Hamming Loss:", hamming_loss(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred, target_names=mlb.classes_))

    # Save the model and preprocessing objects
    model.save(output_path)

    return model






In [19]:

    # Train diet model
diet_model_path = "models/gym_diet/neural_nets/diet_model.h5"
diet_model = train_and_save_model(X_train, y_diet_train, X_test, y_diet_test, diet_model_path, mlb_diet, scaler)



Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 6.7960e-04 - loss: 0.4402 - val_accuracy: 0.0000e+00 - val_loss: 0.1417
Epoch 2/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0141 - loss: 0.1507 - val_accuracy: 0.0000e+00 - val_loss: 0.0983
Epoch 3/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0112 - loss: 0.1150 - val_accuracy: 0.0000e+00 - val_loss: 0.0844
Epoch 4/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0057 - loss: 0.0964 - val_accuracy: 0.0000e+00 - val_loss: 0.0783
Epoch 5/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0033 - loss: 0.0875 - val_accuracy: 0.0000e+00 - val_loss: 0.0764
Epoch 6/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0028 - loss: 0.0828 - val_accuracy: 0.0000e+00 - val_loss: 0.0740
Epoch 7/20


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Classification Report:
                  precision    recall  f1-score   support

           Aloe       0.93      0.93      0.93       450
          Apple       0.97      0.97      0.97      2059
           Baru       0.98      0.98      0.98      2345
          Beech       0.96      0.97      0.97      1895
       Beetroot       0.95      0.95      0.95      1354
       Broccoli       0.93      0.93      0.93       450
         Capers       0.90      0.92      0.91       541
        Carrots       0.92      0.92      0.92       631
         Cheese       0.96      0.97      0.97      2018
   Cold-pressed       0.93      0.93      0.93       450
          Fruit       0.92      0.92      0.92       631
         Garlic       0.97      0.97      0.97      2059
          Green       0.94      0.96      0.95      1599
           Hemp       0.95      0.95      0.95      1193
        Iceberg       0.90      0.92      0.91       541
          Juice       0.99      1.00      1.00      2903
      

In [20]:
 # Train exercise model
exercise_model_path = "models/gym_diet/neural_nets/exercise_model.h5"
exercise_model = train_and_save_model(X_train, y_exercises_train, X_test, y_exercises_test, exercise_model_path, mlb_exercises, scaler)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.1048 - loss: 0.3986 - val_accuracy: 0.0522 - val_loss: 0.0333
Epoch 2/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1147 - loss: 0.0348 - val_accuracy: 0.0171 - val_loss: 0.0152
Epoch 3/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1550 - loss: 0.0145 - val_accuracy: 0.0069 - val_loss: 0.0132
Epoch 4/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1264 - loss: 0.0117 - val_accuracy: 0.0694 - val_loss: 0.0115
Epoch 5/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1474 - loss: 0.0099 - val_accuracy: 0.1096 - val_loss: 0.0113
Epoch 6/20
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.1382 - loss: 0.0097 - val_accuracy: 0.0994 - val_loss: 0.0126
Epoch 7/20
[1m292/292[0m [32m━━━━━━━



Hamming Loss: 0.0024845784784098698
Classification Report:
               precision    recall  f1-score   support

       Brisk       0.99      1.00      0.99       774
      Squats       1.00      1.00      1.00      1441
    Swimming       1.00      0.99      1.00       370
     Walking       1.00      0.99      1.00       370
        Yoga       1.00      0.99      1.00       370
       bench       1.00      1.00      1.00      1441
       brisk       1.00      1.00      1.00       333
     cycling       0.99      1.00      1.00      1107
     dancing       0.99      1.00      1.00      1107
   deadlifts       1.00      1.00      1.00      1441
    overhead       1.00      1.00      1.00      1441
     presses       1.00      1.00      1.00      1441
     running       0.99      1.00      0.99       774
    swimming       0.99      1.00      1.00      1107
     walking       0.99      1.00      1.00      1107
        yoga       1.00      1.00      1.00       725

   micro avg       1

In [5]:
def load_model_and_preprocessors(path,model_name):
    preprocessor_path=path+"/preprocessor/"
    model_path=path+"/neural_nets/{}_model.h5".format(model_name)
    model = load_model(model_path)
    scaler=joblib.load(preprocessor_path+"scaler.pkl")
    with open(preprocessor_path+"model_paths.json", "r") as file:
        label_dict = json.load(file) 
    for name,file_name in label_dict.items():
        label_dict[name]=joblib.load(preprocessor_path+file_name)
    mlb=joblib.load(preprocessor_path+"mlb_"+model_name+".pkl")

    #mlb = joblib.load(model_path + "_mlb.pkl")
    #scaler = joblib.load(model_path + "_scaler.pkl")
    return model, mlb, scaler,label_dict




def get_top_recommendations(predictions, mlb, top_n=3):
    recommended_items = []
    for pred in predictions:
        top_indices = np.argsort(pred)[-top_n:][::-1]  # Get indices of top N values
        top_items = [mlb.classes_[i] for i in top_indices if pred[i] > 0.5]  # Include only if above threshold
        recommended_items.append(top_items)
    return recommended_items
def predict_with_model(model, scaler, mlb, input_data,label_encoder, top_n=3):
    scaler_data=np.asarray([input_data[0][:4]])
    input_data_scaled = scaler.transform(scaler_data)
    label_data=input_data[0][4:]
    label_data_encoded=[]
    count=0
    for key,value in label_encoder.items():
        data_point=np.asarray([label_data[count]])
        label_data_encoded.append(value.transform(data_point))
        count=count+1
    label_data_encoded=[i[0] for i in label_data_encoded]
    ids=list(input_data_scaled[0])
    ids.extend(label_data_encoded)
    final_data=np.asarray(ids)
    final_data=final_data.reshape(1,10)

    predictions = model.predict(final_data)
    return get_top_recommendations(predictions, mlb, top_n)

In [8]:
 # Load and predict diet recommendations
path="models/gym_diet"
model_name="diet"
model, mlb, scaler,label_dict=load_model_and_preprocessors(path,model_name)
sample_input = np.asarray([[18,1.68,47.5,16.83,"Male","Underweight","Weight Gain","Muscular Fitness","No","No"]])
diet_recommendations = predict_with_model(model, scaler, mlb, sample_input, top_n=3,label_encoder=label_dict)
print("Diet recommendations for sample inputs:", diet_recommendations)

model_name="exercise"
model, mlb, scaler,label_dict=load_model_and_preprocessors(path,model_name)
sample_input = np.asarray([[18,1.68,47.5,16.83,"Male","Underweight","Weight Gain","Muscular Fitness","No","No"]])
exer_recommendations = predict_with_model(model, scaler, mlb, sample_input, top_n=3,label_encoder=label_dict)
print("Exercise recommendations for sample inputs:", exer_recommendations)

    




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step




Diet recommendations for sample inputs: [['Juice', 'Vegetables', 'Protein']]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Exercise recommendations for sample inputs: [['presses', 'overhead', 'deadlifts']]
