In [32]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy


In [33]:
# Example usage
user_ingredients = ["vaj", "fokhagyma", "kolbász"]
query = "Egy sajtos levest szeretnék enni, fokhagymás"
user_recipe_id = 3

In [34]:
df = pd.read_json("recipes.json")

df['ingredients_str'] = df['ingredients'].apply(lambda x: " ".join(x))

# Check the preprocessed data
print(df[['title', 'ingredients_str']].head())

                                 title  \
0                       Abált szalonna   
1                         Áfonya torta   
2                     Áfonyás húsgolyó   
3  Áfonyás, kevert sütemény mandulával   
4          Áfonyás-narancsos sajttorta   

                                     ingredients_str  
0  tokaszalonna só fokhagyma fűszerpaprika friss ...  
1  tönköly búzaliszt makadámdió szójatejföl nádcu...  
2                                                     
3  a morzsához: 40 g liszt 100 g cukor 1 tk őrölt...  
4  5 dk teljes kiőrlésű búzadarás keksz 5 dkg zab...  


In [35]:
vectorizer = TfidfVectorizer()
ingredient_vectors = vectorizer.fit_transform(df['ingredients_str'])

# Find similar recipes
similarity_matrix = cosine_similarity(ingredient_vectors)

In [36]:
def recommend_recipe(recipe_id, similarity_matrix, df, top_n=5):
    sim_scores = list(enumerate(similarity_matrix[recipe_id]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_recipes = [df.iloc[i[0]] for i in sim_scores[1:top_n+1]]
    return top_recipes

In [37]:
# Find similar recipes to the recipe that has 3 as ID
recommend_recipe(user_recipe_id, similarity_matrix, df)

[id                                                              4590
 title                            Áfonyás, kevert sütemény mandulával
 content            A sütőt 190 fokra előmelegítjük. Egy 20x30 cm-...
 ingredients        [a morzsához:, 40 g liszt, 100 g cukor, 1 tk ő...
 ingredients_str    a morzsához: 40 g liszt 100 g cukor 1 tk őrölt...
 Name: 4590, dtype: object,
 id                                                              3588
 title                                            Céklás-almás morzsa
 content            Egy keverőtálban összekeverünk mandulalisztet,...
 ingredients        [50 g pekándió, 50 g fenyőmag, 150 g cékla, ba...
 ingredients_str    50 g pekándió 50 g fenyőmag 150 g cékla balzsa...
 Name: 3588, dtype: object,
 id                                                               703
 title                                                         Éclair
 content            A sütőt 200 fokra előmelegítjük A tejet, vizet...
 ingredients        [a tésztához:,

In [38]:
# Normalize ingredients (convert to lowercase for consistency)
df['ingredients_normalized'] = df['ingredients'].apply(
    lambda x: [ingredient.lower() for ingredient in x]
)

# Check the normalized data
print(df[['title', 'ingredients_normalized']].head())

                                 title  \
0                       Abált szalonna   
1                         Áfonya torta   
2                     Áfonyás húsgolyó   
3  Áfonyás, kevert sütemény mandulával   
4          Áfonyás-narancsos sajttorta   

                              ingredients_normalized  
0  [tokaszalonna, só, fokhagyma, fűszerpaprika, f...  
1  [tönköly búzaliszt, makadámdió, szójatejföl, n...  
2                                                 []  
3  [a morzsához:, 40 g liszt, 100 g cukor, 1 tk ő...  
4  [5 dk teljes kiőrlésű búzadarás keksz, 5 dkg z...  


In [39]:
def find_recipes_by_ingredients(user_ingredients, data, top_n=5):
    user_ingredients = [ingredient.lower() for ingredient in user_ingredients]  # Normalize user input
    
    # Calculate overlap score for each recipe
    data['ingredient_overlap'] = data['ingredients_normalized'].apply(
        lambda recipe_ingredients: len(set(user_ingredients) & set(recipe_ingredients))
    )
    
    # Sort recipes by overlap score in descending order
    recommendations = data.sort_values(by='ingredient_overlap', ascending=False).head(top_n)
    
    # Return the top matching recipes
    return recommendations[['id', 'title', 'ingredient_overlap']]

print(find_recipes_by_ingredients(user_ingredients, df))

        id                                              title  \
1704  1704                     Lasagne cukkínivel és lazaccal   
4460  4460  Lazacfilé aszalt paradicsomos pestoval és bazs...   
4368  4368                      Bélszín mare e monti szósszal   
328    328  Borjútekercs marsalaval ízesített kacsamájjal ...   
2409  2409                       Radicchios-mentás palacsinta   

      ingredient_overlap  
1704                   2  
4460                   2  
4368                   2  
328                    2  
2409                   2  


In [40]:
def combined_recommendation(user_ingredients, similarity_matrix, data, top_n=5):
    # Match by ingredients
    ingredient_matches = find_recipes_by_ingredients(user_ingredients, data, top_n=None)
    
    # Add similarity scores
    ingredient_matches['similarity_score'] = ingredient_matches['id'].apply(
        lambda recipe_id: similarity_matrix[data.index[data['id'] == recipe_id][0]].mean()
    )
    
    # Combine scores (weighted average)
    ingredient_matches['combined_score'] = ingredient_matches['ingredient_overlap'] * 0.7 + ingredient_matches['similarity_score'] * 0.3
    
    # Sort by combined score
    recommendations = ingredient_matches.sort_values(by='combined_score', ascending=False).head(top_n)
    
    return recommendations[['id', 'title', 'combined_score']]

# Example usage
print(combined_recommendation(user_ingredients, similarity_matrix, df))


        id                                              title  combined_score
2518  2518  Rosé kacsamell grillezett édesburgonyával és c...        1.421247
2279  2279           Párolt bárány bébirépával és burgonyával        1.416472
2831  2831                                  Sütőtökös rizottó        1.411917
2338  2338                            Pisztráng májjal töltve        1.411856
2409  2409                       Radicchios-mentás palacsinta        1.411794


In [41]:
# Load spaCy's NLP model
nlp = spacy.load("en_core_web_sm")

# Example recipe data
data = pd.read_json("recipes.json")

# Normalize ingredients and combine features
data['ingredients_normalized'] = data['ingredients'].apply(lambda x: [ing.lower() for ing in x])
data['ingredients_str'] = data['ingredients_normalized'].apply(lambda x: " ".join(x))
data['combined_features'] = data['ingredients_str'] + " " + data['content']

# Vectorize the combined features for similarity search
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(data['combined_features'])


In [42]:
def recommend_recipes(query, data, tfidf_matrix, top_n=5):
    """
    Recommend recipes based on free text or ingredient list query.
    """
    if isinstance(query, list):  # If input is a list of ingredients
        query = " ".join([q.lower() for q in query])  # Join ingredients into a single string
    
    # Use spaCy to process free-text queries
    doc = nlp(query.lower())
    keywords = " ".join([token.text for token in doc if token.is_alpha])  # Extract meaningful words
    
    # Transform the query into a TF-IDF vector
    query_vector = vectorizer.transform([keywords])
    
    # Calculate cosine similarity with the recipe matrix
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    
    # Sort recipes by similarity score
    data['similarity_score'] = similarities
    recommendations = data.sort_values(by='similarity_score', ascending=False).head(top_n)
    
    return recommendations[['id', 'title', 'similarity_score']]

print(recommend_recipes(query, data, tfidf_matrix))


print(recommend_recipes(user_ingredients, data, tfidf_matrix))


        id                                              title  \
1403  1403                                         Kagyló 2 X   
3922  3922                          Krumplis flekni lekvárral   
4518  4518   Sárgarépa-póréhagyma krémleves sajtos ropogóssal   
718    718         Édesburgonya-krémleves chilis rákfarkakkal   
3216  3216  „Torta” burgonyából és taleggio sajtból, debre...   

      similarity_score  
1403          0.152379  
3922          0.146394  
4518          0.136020  
718           0.128300  
3216          0.112828  
        id                                            title  similarity_score
3378  3378                       Vörösboros spanyol kolbász          0.250355
2425  2425  Rakott krumpli túróval, szalonnával, kolbásszal          0.221725
4663  4663                                Egylábasos tészta          0.214181
738    738                                Egylábasos tészta          0.214181
409    409                                Cékla burgonyával          

In [43]:
def combined_scoring(query, data, tfidf_matrix, top_n=5):
    # Calculate text similarity
    recommendations = recommend_recipes(query, data, tfidf_matrix, top_n=None)
    
    # Calculate ingredient overlap for ingredient-based queries
    if isinstance(query, list):
        query_set = set([q.lower() for q in query])
        data['ingredient_overlap'] = data['ingredients_normalized'].apply(
            lambda x: len(query_set & set(x))
        )
    else:
        data['ingredient_overlap'] = 0
    
    # Combine similarity and overlap scores
    data['combined_score'] = data['similarity_score'] * 0.7 + data['ingredient_overlap'] * 0.3
    recommendations = data.sort_values(by='combined_score', ascending=False).head(top_n)
    
    return recommendations[['id', 'title', 'combined_score']]

# Example with combined scoring
print(combined_scoring(query_ingredients, data, tfidf_matrix))


        id                              title  combined_score
3440  3440  Zöldsaláta balzsamos gombaraguval        0.435618
3403  3403              Zacskóban sült csirke        0.423591
4834  4834                             Quiche        0.370622
4503  4503                             Quiche        0.370622
4724  4724              Juhtúrós borkorcsolya        0.367814
