In [1]:
# Install xgboost if not already installed
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)
Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl (153.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.9/153.9 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl (199.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.0/199.0 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.23.4 xgboost-2.1.1


In [22]:
import pandas as pd

# Load the dataset from the uploaded file
df = pd.read_csv('/content/recipe_final edited hybrid refined.csv')

# Display the first few rows to verify it was loaded correctly
df.head()


Unnamed: 0.1,Unnamed: 0,recipe_id,avrg_rate,review_nums,best_score
0,0,222388,5,3,527
1,1,240488,5,29,724
2,2,218939,5,12,710
3,3,87211,5,163,581
4,4,245714,5,2,820


In [29]:
# Check unique classes in the target variable
num_classes = len(df['avrg_rate'].unique())
print(f"Number of unique classes: {num_classes}")


Number of unique classes: 6


In [30]:
# Define features (X) and target (y)
X = df.drop(columns=['avrg_rate'])  # Features
y = df['avrg_rate']                 # Target (multi-class)

# Split into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [24]:
# Install XGBoost
!pip install xgboost




In [31]:
import xgboost as xgb
from sklearn.metrics import accuracy_score


In [32]:
# Convert the training and testing data into DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


In [33]:
# Set the XGBoost parameters for multi-class classification
params = {
    'max_depth': 5,              # Maximum depth of the trees
    'eta': 0.1,                  # Learning rate
    'objective': 'multi:softmax', # Use 'multi:softmax' for multi-class classification
    'num_class': num_classes,     # Set the number of unique classes
    'eval_metric': 'mlogloss'     # Evaluation metric for multi-class classification
}


In [34]:
# Convert data to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Train the XGBoost model
num_boost_round = 100
model = xgb.train(params, dtrain, num_boost_round)


In [35]:
# Make predictions on the test set
y_pred = model.predict(dtest)

# Convert predictions to integer classes (if using multi:softprob, pick the class with max probability)
# y_pred = np.argmax(model.predict(dtest), axis=1)  # If using multi:softprob


In [36]:
# Evaluate the model using accuracy
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Optional: Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))


Accuracy: 60.96%
[[   0    0    0    0    0    2]
 [   0    0    0    0    1    8]
 [   0    0    0    0   30   19]
 [   0    0    0    0  307  157]
 [   0    0    0    1 3679 1483]
 [   0    0    0    0 1797 2263]]


In [37]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.3-py3-none-any.whl.metadata (34 kB)
Collecting py-cpuinfo (from ultralytics)
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.8-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.3-py3-none-any.whl (881 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m881.4/881.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.8-py3-none-any.whl (26 kB)
Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, ultralytics-thop, ultralytics
Successfully installed py-cpuinfo-9.0.0 ultralytics-8.3.3 ultralytics-thop-2.0.8


In [41]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.impute import SimpleImputer
from ultralytics import YOLO
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = '/content/my recipe.csv'
recipe_df = pd.read_csv(file_path)

recipe_df['ingredients_list'].fillna('', inplace=True)
imputer = SimpleImputer(strategy='mean')

# Extract numerical features and ingredients from your dataset
numerical_features = recipe_df[['calories', 'fat', 'carbohydrates', 'protein']]
ingredients = recipe_df['ingredients_list']
target = recipe_df['avrg_rate']  # Assuming there's a column for user ratings or preferences

# Impute missing numerical values
numerical_features_imputed = imputer.fit_transform(numerical_features)

# Scale numerical features
scaler = StandardScaler()
scaled_numerical = scaler.fit_transform(numerical_features_imputed)

# Vectorize ingredients
vectorizer = TfidfVectorizer()
transformed_ingredients = vectorizer.fit_transform(ingredients).toarray()

# Combine numerical and ingredients features
combined_features = np.hstack([scaled_numerical, transformed_ingredients])

# Train KNN
knn = NearestNeighbors(n_neighbors=5)
knn.fit(combined_features)

# Train an XGBoost model
X_train, X_test, y_train, y_test = train_test_split(combined_features, target, test_size=0.2, random_state=42)

xgboost_model = xgb.XGBRegressor(objective='reg:squarederror')
xgboost_model.fit(X_train, y_train)

# Evaluate the XGBoost model
y_pred = xgboost_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"XGBoost Model MSE: {mse}")

# YOLOv10 Model for Ingredient Detection
model = YOLO('/content/ingred.pt')  # Replace with actual path


# Function to predict ingredients from an image using YOLOv10
def predict_ingredients(image_path):
    # Run the model on the image
    results = model(image_path)

    predicted_classes = []
    for box in results[0].boxes:
        class_id = int(box.cls)  # Extract class ID
        class_name = model.names[class_id]  # Retrieve class name based on ID
        predicted_classes.append(class_name)

    predicted_classes = list(set(predicted_classes))  # Remove duplicates
    return predicted_classes


# Function to recommend recipes using KNN and then re-rank them using XGBoost
def recommend_recipes(input_features):
    # Split numerical features and ingredients
    numerical_features = input_features[:7]
    predicted_ingredients = input_features[7]

    # Impute and scale the input numerical features
    input_features_imputed = imputer.transform([numerical_features])
    input_features_scaled = scaler.transform(input_features_imputed)

    # Vectorize the input ingredients
    input_ingredients_transformed = vectorizer.transform([predicted_ingredients])

    # Combine scaled numerical and transformed ingredient features
    input_combined = np.hstack([input_features_scaled, input_ingredients_transformed.toarray()])

    # Find nearest neighbors using KNN
    distances, indices = knn.kneighbors(input_combined)

    # Get initial recommendations
    knn_recommendations = recipe_df.iloc[indices[0]]

    # Re-rank recommendations using XGBoost
    knn_features = np.hstack([
        scaler.transform(imputer.transform(knn_recommendations[['calories', 'fat', 'carbohydrates', 'protein', 'cholesterol', 'sodium', 'fiber']])),
        vectorizer.transform(knn_recommendations['ingredients_list']).toarray()
    ])

    # Predict scores using XGBoost for re-ranking
    xgboost_scores = xgboost_model.predict(knn_features)
    knn_recommendations['xgboost_score'] = xgboost_scores

    # Sort recommendations by XGBoost score
    re_ranked_recommendations = knn_recommendations.sort_values(by='xgboost_score', ascending=False)

    # Return top-ranked recipes
    return re_ranked_recommendations[['recipe_name', 'ingredients_list', 'image_url', 'xgboost_score']]

# Example usage
image_path = '/content/potp.jpeg'  # Replace with the path to your test image
predicted_ingredients = predict_ingredients(image_path)
predicted_ingredients_str = ', '.join(predicted_ingredients)  # Join ingredients as a string

input_features = [0, 0, 0, 0, predicted_ingredients_str]  # Numerical features + predicted ingredients

# Get recommendations and re-rank them
recommendations = recommend_recipes(input_features)

# Output the recommended recipes
print("Recommended Recipes (Re-ranked by XGBoost):")
print(recommendations)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  recipe_df['ingredients_list'].fillna('', inplace=True)


XGBoost Model MSE: 0.34922484023849387

image 1/1 /content/potp.jpeg: 640x640 2 Potatos, 127.3ms
Speed: 2.5ms preprocess, 127.3ms inference, 0.4ms postprocess per image at shape (1, 3, 640, 640)
Recommended Recipes (Re-ranked by XGBoost):
                                recipe_name  \
20038                         Strassburgare   
16618                    Yummy Potato Skins   
16086                          Potato Chips   
26970                  Swiss Hazelnut Taler   
48285  Super Simple Sweet Potato Dog Treats   

                                        ingredients_list  \
20038  ['butter', "confectioners' sugar", 'vanilla su...   
16618  ['Italian-style salad dressing', "potato's pee...   
16086                ['vegetable oil', 'potato', 'salt']   
26970  ['shelled hazelnuts', 'all-purpose flour', 'ba...   
48285  ['sweet potato', 'wheat flour', 'unsweetened a...   

                                               image_url  xgboost_score  
20038  http://images.media-allrecipes.com/u

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  knn_recommendations['xgboost_score'] = xgboost_scores
