In [1]:
import pandas as pd

# Läs in den slutgiltiga, sammanfogade filen
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")
print(df.columns)
print(df.head())


Index(['name', 'Unnamed: 0', 'country', 'description', 'id_', 'keywords',
       'is_shoppable', 'language', 'slug', 'video_url', 'is_licensed_video',
       'is_community', 'thumbnail_url', 'inspired_by', 'linked_recipes',
       'cook_time', 'prep_time', 'total_time', 'ratings_negative',
       'ratings_positive', 'score', 'protein', 'fat', 'calories', 'sugar',
       'carbohydrates', 'fiber', 'ingredients', 'tag_name'],
      dtype='object')
                                         name  Unnamed: 0 country  \
0  1-Day Noodles (Taiwanese Beef Noodle Soup)        1936      US   
1                         1-Hour Banana Bread        3885      US   
2               1-Hour Buffalo  Chicken Wings        1852      US   
3             1-Hour Noodles (Zha Jiang Mian)        1736      US   
4                            1-Minute Noodles        1536      US   

                                         description   id_ keywords  \
0                                                NaN  5464      N

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english')
X = tfidf.fit_transform(df['ingredients'].fillna('') + ' ' + df['tag_name'].fillna(''))

print(X.shape)


(4956, 2673)


## Skapa en funktion som, givet en textsträng (t.ex. "chicken, garlic, soy sauce"), returnerar de mest liknande recepten.

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def recommend_recipes(user_ingredients, df, tfidf, X, top_n=5):
    """
    user_ingredients: str, t.ex. "chicken, garlic, soy sauce"
    df: din DataFrame
    tfidf: din TfidfVectorizer
    X: din TF-IDF-matris
    top_n: antal rekommendationer att returnera
    """
    # Skapa en TF-IDF-vektor för användarens ingredienser
    user_vec = tfidf.transform([user_ingredients])
    # Beräkna likheten mot alla recept
    similarities = cosine_similarity(user_vec, X).flatten()
    # Hämta index för de mest liknande recepten
    top_indices = similarities.argsort()[-top_n:][::-1]
    # Returnera de bästa recepten
    return df.iloc[top_indices][['name', 'ingredients', 'tag_name', 'description']]

# Testa funktionen!
test_result = recommend_recipes("chicken, garlic, soy sauce", df, tfidf, X, top_n=5)
print(test_result)


                                                name  \
3579                 Pork Red Na (Noodles And Gravy)   
2626                                     Katsu Curry   
3204                       One-Pan Flavorful Tenders   
4504  Thai Street Wings As Made By Chef Arnold Myint   
2148                                 Gobi Manchurian   

                                            ingredients  \
3579  pork shoulder, rice noodle, dark soy sauce, ra...   
2626  onion, butter, garlic, flour, mild curry powde...   
3204  oil, red chili flakes, jalapeño, white vinegar...   
4504  chicken wings, fish sauce, low sodium soy sauc...   
2148  cauliflower florets, salt, flour, cornstarch, ...   

                                               tag_name  \
3579   Dairy-Free, Weeknight, Dinner, Stove Top, Fusion   
2626  Low-Sugar, Stove Top, Special Occasion, Dinner...   
3204  Dairy-Free, Stove Top, Fusion, Weeknight, Lunc...   
4504  Dairy-Free, High-Protein, Thai, Dry Measuring ...   
2148  Vegetar

In [4]:
# Snabb EDA
print("=== Grundläggande statistik ===")
print(f"Antal recept: {len(df)}")
print(f"Antal unika ingredienser: {df['ingredients'].str.split(', ').explode().nunique()}")
print(f"Saknade värden i 'ingredients': {df['ingredients'].isnull().sum()}")

# Visa vanligaste ingredienserna
all_ingredients = df['ingredients'].str.split(', ').explode().value_counts().head(20)
print("\nTop 20 vanligaste ingredienser:")
print(all_ingredients)


=== Grundläggande statistik ===
Antal recept: 4956
Antal unika ingredienser: 6279
Saknade värden i 'ingredients': 0

Top 20 vanligaste ingredienser:
ingredients
salt                           2148
kosher salt                    1506
garlic                         1324
pepper                         1166
olive oil                      1138
unsalted butter                 910
water                           857
sugar                           802
vanilla extract                 784
butter                          684
milk                            554
eggs                            543
large eggs                      474
granulated sugar                463
garlic powder                   462
powdered sugar                  444
all purpose flour               438
heavy cream                     435
baking powder                   421
freshly ground black pepper     406
Name: count, dtype: int64


In [5]:
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    # Grundläggande rensning
    text = text.lower().replace(',', ' ')
    return ' '.join(text.split())

df['processed_ingredients'] = df['ingredients'].apply(preprocess_text)


In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import os

# 1. Läs in data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")

# 2. Förbearbeta text
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    return ' '.join(str(text).lower().replace(',', ' ').split())

# Kombinera ingredienser och taggar
df['processed'] = df['ingredients'].apply(preprocess_text) + ' ' + df['tag_name'].apply(preprocess_text)

# 3. Skapa och träna TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])  # Använd den nya kolumnen 'processed'

# 4. Spara modeller
os.makedirs("models", exist_ok=True)
joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(tfidf_matrix, "models/tfidf_matrix.pkl")

print("✅ Modeller sparade!")




✅ Modeller sparade!


In [7]:
# Efter att du sparat modellerna
# Validera med kända recept
chicken_recipes = df[df['name'].str.contains('Chicken')].sample(3)
for _, row in chicken_recipes.iterrows():
    print(f"\nTestar med: {row['ingredients']}")
    print(get_recommendations(row['ingredients']).head(3))



Testar med: Marketside® Cauliflower Florets, kosher salt, black pepper, Marketside® Traditional Rotisserie Chicken, tomato salad, red wine vinaigrette, asparagus
                                               name  \
3802          Rotisserie Chicken Dinner: The Legacy   
3801    Rotisserie Chicken Dinner: The Garden-Lover   
3800  Rotisserie Chicken Dinner: Coop, There It Is!   

                                            ingredients description  
3802  Marketside® Cauliflower Florets, kosher salt, ...         NaN  
3801  Marketside® Cauliflower Florets, kosher salt, ...         NaN  
3800  Marketside® Butternut Squash, kosher salt, bla...         NaN  

Testar med: oil, chicken, salt, pepper, onion, carrot, potato, water, peas, curry paste
                       name  \
1115  Chinese Chicken Curry   
3976    Simple Veggie Curry   
3572     Poori & Aloo Sabzi   

                                            ingredients description  
1115  oil, chicken, salt, pepper, onion, carrot, pot

In [16]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import os

# Läs in data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")

# Förbearbeta text
def preprocess(text):
    return ' '.join(str(text).lower().replace(',', ' ').split()) if pd.notnull(text) else ""

df['processed'] = df['ingredients'].apply(preprocess) + ' ' + df['tag_name'].apply(preprocess)

# Träna och spara TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])

os.makedirs("models", exist_ok=True)
joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(tfidf_matrix, "models/tfidf_matrix.pkl")

print("✅ Modeller sparade!")



✅ Modeller sparade!


In [9]:
# Testa att ladda modellen och göra en rekommendation
loaded_tfidf = joblib.load('tfidf_model.pkl')
test_input = "kyckling, ris, soja"
processed_test_input = preprocess_text(test_input)
test_vec = loaded_tfidf.transform([processed_test_input])
print("Testvektor skapad utan fel!")


Testvektor skapad utan fel!


In [14]:
# Testa rekommendationer efter att ha kört koden ovan
test_result = get_recommendations("chicken, rice, soy sauce")
print(test_result[['name', 'ingredients']])


                                    name  \
2083               General Tso’s Chicken   
2679                    Kung Pao Chicken   
3240  One-Pot Chicken Teriyaki With Rice   
3579     Pork Red Na (Noodles And Gravy)   
1798                 Easy Orange Chicken   

                                            ingredients  
2083  vegetable oil, rice wine, soy sauce, boneless,...  
2679  chicken breast, sesame oil, zucchini, red pepp...  
3240  olive oil, chicken breasts, salt, pepper, garl...  
3579  pork shoulder, rice noodle, dark soy sauce, ra...  
1798  boneless, skinless chicken breasts, soy sauce,...  
