In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
from nltk.stem.porter import PorterStemmer

In [2]:
file_path = "cleaned_recipes(Final_Dataset).csv"
recipes_df = pd.read_csv(file_path)

In [3]:
stemmer = PorterStemmer()

In [4]:
def preprocess_ingredients(ingredients):
    # Lowercase the text
    ingredients = ingredients.lower()
    # Remove special characters and digits
    ingredients = re.sub(r"[^a-zA-Z\s,]", "", ingredients)
    # Stemming each word
    ingredients = " ".join([stemmer.stem(word) for word in ingredients.split()])
    return ingredients

In [5]:
recipes_df['Processed_Ingredients'] = recipes_df['Ingredients'].apply(preprocess_ingredients)
recipes_df['Processed_Ingredients'].head()

0    beans, carrot , green pea , potato , cucumber,...
1    cucumber, carrot , sugar, rice vinegar,gingell...
2    ladi finger , tamarind, turmer powder , coconu...
3    black eye bean , eleph yam , turmer powder , j...
4    chicken breasts, whole wheat bread crumbs, all...
Name: Processed_Ingredients, dtype: object

In [6]:
all_ingredients = recipes_df['Ingredients'].str.split(',').explode().str.strip().drop_duplicates()
ingredients_list = sorted(all_ingredients.tolist())
print(f"Total unique ingredients: {len(ingredients_list)}")
output_file_path = "unique_ingredients.txt"  # Replace with your desired output file path
with open(output_file_path, "w",encoding="utf-8") as file:
    for ingredient in ingredients_list:
        file.write(f"{ingredient}\n")

print(f"Unique ingredients saved to {output_file_path}")

Total unique ingredients: 1341
Unique ingredients saved to unique_ingredients.txt


In [7]:
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(), stop_words='english', token_pattern=None)

# Fit and transform the processed ingredients
ingredient_matrix = vectorizer.fit_transform(recipes_df['Ingredients'])

# Check the shape of the feature matrix
print(f"Feature matrix shape: {ingredient_matrix.shape}")

Feature matrix shape: (6691, 1595)


In [15]:
def recommend_recipes_by_ingredients(input_ingredients, top_n=5):
    # Preprocess user input
    user_ingredients = set(map(str.strip, input_ingredients.lower().split(',')))
    
    recommendations = []

    for index, row in recipes_df.iterrows():
        # Process recipe ingredients
        recipe_ingredients = set(map(str.strip, row['Ingredients'].lower().split(',')))
        
        # Check if all user ingredients are present in the recipe
        if user_ingredients.issubset(recipe_ingredients):
            # Calculate extra ingredients
            extra_ingredients = list(recipe_ingredients - user_ingredients)
            extra_ingredients_count = len(extra_ingredients)
            recommendations.append({
                'Recipe Name': row['Recipe Name'],
                'Extra Ingredients Count': extra_ingredients_count,
                'Extra Ingredients': ', '.join(extra_ingredients),  # Corrected key
                'Link': row['Link']
            })

    # Sort by extra ingredients count (ascending)
    recommendations = sorted(recommendations, key=lambda x: x['Extra Ingredients Count'])
    
    # Return the top N recipes
    return pd.DataFrame(recommendations[:top_n])


In [16]:
input_ingredients = "paneer,water"
ingredient_recommendations = recommend_recipes_by_ingredients(input_ingredients, top_n=5)
print(ingredient_recommendations)

                                         Recipe Name  Extra Ingredients Count  \
0                         saffron paneer peda recipe                        5   
1  manda pitha recipe orissa style steamed rice f...                        7   
2                     eggless paan cheesecake recipe                        7   
3                                   dry jamun recipe                        7   
4                  corn   paneer baked samosa recipe                        7   

                                   Extra Ingredients  \
0  cardamom powder, saffron strands, rose water, ...   
1  salt, cardamom powder, rice flour, milk, fresh...   
2  agar agar, caster sugar, vanilla extract, gulk...   
3  dessicated coconut, cardamom powder, milk powd...   
4  salt, carom seeds, coriander leaves, sweet cor...   

                                                Link  
0  https://www.archanaskitchen.com/saffron-paneer...  
1  https://www.archanaskitchen.com/manda-pitha-re...  
2  https://