In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import os


In [2]:
# Kontrollera att filen finns i samma mapp som din notebook
import os
assert os.path.exists("recipes_with_ingredients_and_tags.csv"), "CSV-filen saknas! Ladda upp den till projektmappen."


In [4]:
#LÄser in data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")
print(df.columns)
print(df.head())


Index(['name', 'Unnamed: 0', 'country', 'description', 'id_', 'keywords',
       'is_shoppable', 'language', 'slug', 'video_url', 'is_licensed_video',
       'is_community', 'thumbnail_url', 'inspired_by', 'linked_recipes',
       'cook_time', 'prep_time', 'total_time', 'ratings_negative',
       'ratings_positive', 'score', 'protein', 'fat', 'calories', 'sugar',
       'carbohydrates', 'fiber', 'ingredients', 'tag_name'],
      dtype='object')
                                         name  Unnamed: 0 country  \
0  1-Day Noodles (Taiwanese Beef Noodle Soup)        1936      US   
1                         1-Hour Banana Bread        3885      US   
2               1-Hour Buffalo  Chicken Wings        1852      US   
3             1-Hour Noodles (Zha Jiang Mian)        1736      US   
4                            1-Minute Noodles        1536      US   

                                         description   id_ keywords  \
0                                                NaN  5464      N

In [5]:
#Definiera förbearbetningsfunktion
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    return ' '.join(str(text).lower().replace(',', ' ').split())


In [6]:
#Skapa en ny kolumn som kombinerar ingredienser och taggar
df['processed'] = df['ingredients'].apply(preprocess_text) + ' ' + df['tag_name'].apply(preprocess_text)


In [7]:
#Träna och spara TF-IDF-modellen
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])

os.makedirs("models", exist_ok=True)
joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(tfidf_matrix, "models/tfidf_matrix.pkl")

print("✅ Modeller sparade!")


✅ Modeller sparade!


In [8]:
#Definiera rekommendationsfunktion
def get_recommendations(user_input, top_n=5):
    processed_input = preprocess_text(user_input)
    input_vec = tfidf.transform([processed_input])
    sim_scores = cosine_similarity(input_vec, tfidf_matrix).flatten()
    top_indices = sim_scores.argsort()[-top_n:][::-1]
    results = df.iloc[top_indices][['name', 'ingredients', 'description']].fillna("")
    return results


In [9]:
#Validera modellen med kända recept
chicken_recipes = df[df['name'].str.contains('Chicken', case=False, na=False)].sample(3)
for _, row in chicken_recipes.iterrows():
    print(f"\nTestar med: {row['ingredients']}")
    print(get_recommendations(row['ingredients']).head(3))



Testar med: bone-in, skin-on chicken thighs, sea salt, freshly ground black pepper, canola oil, yellow onion, fresh tomato, poblano chile, garlic, hot smoked paprika, red wine vinegar, low sodium chicken stock, bacon, medium red onion, red bell pepper, celery, jalapeño, basmati rice, chopped tomato, bay leaves, sea salt, low sodium chicken broth
                                                   name  \
620   Braised Chicken Thighs With Red Rice As Made B...   
977                               Chicken And Kale Stew   
1763                            Easy Chicken Cacciatore   

                                            ingredients  \
620   bone-in, skin-on chicken thighs, sea salt, fre...   
977   fresh bay leaves, fresh rosemary, fresh thyme,...   
1763  boneless, skinless chicken breasts, salt, pepp...   

                                            description  
620   My father who has Alzheimer’s was born in Hava...  
977   Want a cozy chicken stew that tastes like it’s...  
176