In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import os


In [2]:
# Kontrollera att filen finns i samma mapp som din notebook
import os
assert os.path.exists("recipes_with_ingredients_and_tags.csv"), "CSV-filen saknas! Ladda upp den till projektmappen."


In [3]:
#LÄser in data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")
print(df.columns)
print(df.head())


Index(['name', 'Unnamed: 0', 'country', 'description', 'id_', 'keywords',
       'is_shoppable', 'language', 'slug', 'video_url', 'is_licensed_video',
       'is_community', 'thumbnail_url', 'inspired_by', 'linked_recipes',
       'cook_time', 'prep_time', 'total_time', 'ratings_negative',
       'ratings_positive', 'score', 'protein', 'fat', 'calories', 'sugar',
       'carbohydrates', 'fiber', 'ingredients', 'tag_name'],
      dtype='object')
                                         name  Unnamed: 0 country  \
0  1-Day Noodles (Taiwanese Beef Noodle Soup)        1936      US   
1                         1-Hour Banana Bread        3885      US   
2               1-Hour Buffalo  Chicken Wings        1852      US   
3             1-Hour Noodles (Zha Jiang Mian)        1736      US   
4                            1-Minute Noodles        1536      US   

                                         description   id_ keywords  \
0                                                NaN  5464      N

In [4]:
#Definiera förbearbetningsfunktion
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    return ' '.join(str(text).lower().replace(',', ' ').split())


In [5]:
#Skapa en ny kolumn som kombinerar ingredienser och taggar
df['processed'] = df['ingredients'].apply(preprocess_text) + ' ' + df['tag_name'].apply(preprocess_text)


In [6]:
#Träna och spara TF-IDF-modellen
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])

os.makedirs("models", exist_ok=True)
joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(tfidf_matrix, "models/tfidf_matrix.pkl")

print("✅ Modeller sparade!")


✅ Modeller sparade!


In [7]:
#Definiera rekommendationsfunktion
def get_recommendations(user_input, top_n=5):
    processed_input = preprocess_text(user_input)
    input_vec = tfidf.transform([processed_input])
    sim_scores = cosine_similarity(input_vec, tfidf_matrix).flatten()
    top_indices = sim_scores.argsort()[-top_n:][::-1]
    results = df.iloc[top_indices][['name', 'ingredients', 'description']].fillna("")
    return results


In [8]:
#Validera modellen med kända recept
chicken_recipes = df[df['name'].str.contains('Chicken', case=False, na=False)].sample(3)
for _, row in chicken_recipes.iterrows():
    print(f"\nTestar med: {row['ingredients']}")
    print(get_recommendations(row['ingredients']).head(3))



Testar med: olive oil, chicken breasts, salt, pepper, garlic, flour, chicken broth, skim milk, salt, pepper, spinach, whole grain penne pasta, parmesan cheese
                                       name  \
2302        Healthier Chicken Alfredo Pasta   
911       Cheesy Chicken Alfredo Pasta Bake   
3239  One-Pot Chicken Spinach Bacon Alfredo   

                                            ingredients description  
2302  olive oil, chicken breasts, salt, pepper, garl...              
911   olive oil, chicken breasts, salt, pepper, garl...              
3239  bacon, chicken breasts, spinach, garlic, salt,...              

Testar med: 2 tablespoons neutral oil, such as canola or vegetable, 1 1/2 cups diced yellow onion, 1 cup diced carrot, 1 cup diced celery, 6 garlic cloves, grated, 1 1/2 medium chicken breasts, cooked and chopped, 5 cups chicken stock, 1 tablespoon kosher salt, 1 tablespoon garlic powder, 1 tablespoon onion powder, 1 tablespoon black pepper, 1 tablespoon dried thyme, 

In [9]:
# Test loading vectorizer
loaded_tfidf = joblib.load("models/tfidf_model.pkl")
print("Loaded IDF vector exists:", hasattr(loaded_tfidf, "idf_"))
test_text = "chicken, rice, soy sauce"
test_vec = loaded_tfidf.transform([test_text])
print("Test transform successful!")


Loaded IDF vector exists: True
Test transform successful!
