<a href="https://colab.research.google.com/github/phoenixha4/recipe_recommender/blob/main/vegan_recomm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Load the vegan recipes dataset
recipes_df = pd.read_csv('vegan_recipes.csv')

# Generate synthetic user data
num_users = 100  # Adjust the number of users as needed
user_ids = range(1, num_users + 1)
user_data = []

for user_id in user_ids:
    # Randomly select liked recipes (up to 5)
    num_likes = np.random.randint(1, 6)
    liked_recipes = np.random.choice(recipes_df['sno'], size=num_likes, replace=False)

    # Randomly select past searches (up to 5)
    num_searches = np.random.randint(1, 6)
    past_searches = np.random.choice(recipes_df['sno'], size=num_searches, replace=False)

    user_data.append({
        'user_id': user_id,
        'likes': liked_recipes,
        'past_searches': past_searches
    })

# Create a DataFrame from the synthetic user data
user_profiles_df = pd.DataFrame(user_data)

# Display the user profiles
print(user_profiles_df.head(10))


   user_id                         likes           past_searches
0        1                        [1021]              [582, 167]
1        2    [1132, 890, 406, 803, 517]             [1026, 688]
2        3                        [1246]                   [948]
3        4        [1225, 281, 570, 1222]        [279, 285, 1233]
4        5               [502, 414, 811]  [506, 1141, 787, 1306]
5        6        [1160, 442, 1052, 443]   [108, 391, 905, 1058]
6        7  [669, 1323, 542, 1162, 1332]        [497, 305, 1098]
7        8    [1220, 657, 137, 513, 684]  [127, 1168, 1013, 354]
8        9  [1233, 1165, 609, 675, 1292]          [239, 186, 46]
9       10             [1317, 190, 1250]        [297, 889, 1179]


In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk
import re
from nltk.corpus import stopwords
from google.colab import files

# Install necessary libraries
!pip install pandas scikit-learn nltk

# Download NLTK punkt and wordnet resources
import nltk
nltk.download('punkt')
nltk.download('wordnet')

# Download NLTK stopwords
nltk.download('stopwords')

# Function to clean ingredient text
def clean_ingredient(ingredient):
    # Remove quantities and measurements
    ingredient = re.sub(r'\b\d+\s*(tsp|tbsp|g|kg|oz|ml|cup|cups|grams|pounds)\b', '', ingredient, flags=re.IGNORECASE)
    # Remove common words
    common_words = ['ingredient', 'optional', 'vegan', 'non-dairy', 'free', 'gluten-free', 'organic']
    for word in common_words:
        ingredient = ingredient.replace(word, '')
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    # Remove numbers and words indicating quantities
    ingredient = ' '.join([word for word in ingredient.split() if not word.isdigit() and word.lower() not in stop_words])
    return ingredient.strip()

# Function to preprocess ingredients
def preprocess_ingredients(ingredients):
    tokens = nltk.word_tokenize(ingredients.lower())
    cleaned_ingredients = [clean_ingredient(word) for word in tokens]
    processed_text = ' '.join([lemmatizer.lemmatize(word) for word in cleaned_ingredients])
    return processed_text

# Load the dataset
df = pd.read_csv("vegan_recipes.csv")

# Tokenization and Lemmatization using NLTK
lemmatizer = WordNetLemmatizer()

# Apply preprocessing to the ingredients column
df['processed_ingredients'] = df['ingredients'].apply(preprocess_ingredients)

# Extract liked ingredients and past search ingredients for each user
user_profiles_df['liked_ingredients'] = user_profiles_df['likes'].apply(lambda likes: df.loc[df['sno'].isin(likes), 'processed_ingredients'].tolist())
user_profiles_df['searched_ingredients'] = user_profiles_df['past_searches'].apply(lambda searches: df.loc[df['sno'].isin(searches), 'processed_ingredients'].tolist())

# Merge ingredients into a single list for each user
user_profiles_df['all_ingredients'] = user_profiles_df.apply(lambda row: row['liked_ingredients'] + row['searched_ingredients'], axis=1)

# Combine ingredients and titles into a single text for each recipe
df['processed_text'] = df['processed_ingredients'] + ' ' + df['title']

# Create a TF-IDF Vectorizer with normalization
vectorizer = make_pipeline(TfidfVectorizer(stop_words='english'), Normalizer())

# Transform the processed text column into TF-IDF features
tfidf_matrix = vectorizer.fit_transform(df['processed_text'])

# Compute the cosine similarity between recipes based on ingredients and titles
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations based on ingredients
def get_ingredient_recommendations(user_input_ingredients, num_recommendations=5):
    # Preprocess user input ingredients
    processed_user_ingredients = preprocess_ingredients(user_input_ingredients)

    # Transform user input ingredients into TF-IDF features
    user_tfidf = vectorizer.transform([processed_user_ingredients])

    # Calculate cosine similarities between user input and all recipes
    user_cosine_similarities = linear_kernel(user_tfidf, tfidf_matrix).flatten()

    # Get indices of recipes sorted by similarity
    recipe_indices = user_cosine_similarities.argsort()[::-1]

    # Get the top N similar recipes
    top_recipes = recipe_indices[1:num_recommendations + 1]



    return df['title'].iloc[top_recipes]

# Function to get user-specific recommendations based on ingredients and user preferences
def get_user_and_ingredient_recommendations(user_input_ingredients, user_id, num_recommendations=5):
    # Preprocess user input ingredients
    processed_user_ingredients = preprocess_ingredients(user_input_ingredients)

    # Transform user input ingredients into TF-IDF features
    user_tfidf = vectorizer.transform([processed_user_ingredients])

    # Calculate cosine similarities between user input and all recipes
    user_cosine_similarities = linear_kernel(user_tfidf, tfidf_matrix).flatten()

    # Combine cosine similarities based on user preferences and user input
    combined_cosine_similarities = user_cosine_similarities + cosine_sim[user_id]

    # Get indices of recipes sorted by combined similarity
    recipe_indices = combined_cosine_similarities.argsort()[::-1]

    # Get the top N similar recipes
    top_recipes = recipe_indices[1:num_recommendations + 1]



    return df['title'].iloc[top_recipes]

# Function to get user-specific recommendations based on user preferences
def get_user_recommendations(user_id, num_recommendations=5):
    # Get indices of recipes sorted by user preferences
    recipe_indices = cosine_sim[user_id].argsort()[::-1]

    # Get the top N similar recipes
    top_recipes = recipe_indices[1:num_recommendations + 1]



    return df['title'].iloc[top_recipes]




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
print(df.head(5))

   sno                                               href  \
0    0        https://veganuary.com/recipes/rainbow-rice/   
1    1          https://veganuary.com/recipes/mfc-nachos/   
2    2   https://veganuary.com/recipes/hazelnut-truffles/   
3    3  https://veganuary.com/recipes/simple-roasted-r...   
4    4  https://veganuary.com/recipes/baked-apple-char...   

                            title  \
0                    Rainbow Rice   
1                          Nachos   
2               Hazelnut Truffles   
3  Simple Roasted Radish by ChicP   
4           Baked Apple Charlotte   

                                         ingredients  \
0  Ingredients\n\nCarrot ribbons (just use a peel...   
1  Ingredients\n\n400g Meatless Farm Co mince (or...   
2  Ingredients\n\n100g hazelnuts\n2 tablespoons +...   
3  Ingredients\n\n1 170g tub beetroot and horsera...   
4  Ingredients\n\n2 tbsp rapeseed oil\n75g pitted...   

                                         preparation  \
0  Method\n\nCook

In [3]:
# Example 1: Get recommendations based on ingredients only
user_input_ingredients = "tofu, spinach, tomato, garlic"
ingredient_recommendations = get_ingredient_recommendations(user_input_ingredients)
print(f"\nRecipes recommended based on input ingredients:")
print(ingredient_recommendations)

# Example 2: Get user-specific recommendations based on ingredients and user preferences
user_input_ingredients = "tofu, spinach, tomato, garlic"
user_id = 56
user_and_ingredient_recommendations = get_user_and_ingredient_recommendations(user_input_ingredients, user_id)
print(f"\nRecipes recommended based on user preferences and input ingredients:")
print(user_and_ingredient_recommendations)

# Example 3: Get user-specific recommendations based on user preferences only
user_id = 56
user_recommendations = get_user_recommendations(user_id)
print(f"\nRecipes recommended based on user preferences:")
print(user_recommendations)


Recipes recommended based on input ingredients:
667                            Vegan Tofu Benedict
61                   Baked Buckwheat with Tomatoes
1094                       SOUTHWEST TOFU SCRAMBLE
1166    SPINACH AND ‘RICOTTA’ VEGAN STUFFED SHELLS
595       Roast Butternut Squash and Spinach Salad
Name: title, dtype: object

Recipes recommended based on user preferences and input ingredients:
23                     Lentil Bolognese
446        Turkish Tofu & Spinach Börek
626                 Cottage Pie Cobbler
750           Roasted Aubergine Lasagne
463    Tofu Scramble Breakfast Burritos
Name: title, dtype: object

Recipes recommended based on user preferences:
23                                    Lentil Bolognese
750                          Roasted Aubergine Lasagne
814    Cheese on Toast (with Potato and Carrot Cheese)
421                                Spaghetti Bolognese
626                                Cottage Pie Cobbler
Name: title, dtype: object
