In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
import os


In [2]:
# Kontrollera att filen finns i samma mapp som din notebook
import os
assert os.path.exists("recipes_with_ingredients_and_tags.csv"), "CSV-filen saknas! Ladda upp den till projektmappen."


In [3]:
#LÄser in data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")
print(df.columns)
print(df.head())


Index(['name', 'Unnamed: 0', 'country', 'description', 'id_', 'keywords',
       'is_shoppable', 'language', 'slug', 'video_url', 'is_licensed_video',
       'is_community', 'thumbnail_url', 'inspired_by', 'linked_recipes',
       'cook_time', 'prep_time', 'total_time', 'ratings_negative',
       'ratings_positive', 'score', 'protein', 'fat', 'calories', 'sugar',
       'carbohydrates', 'fiber', 'ingredients', 'tag_name',
       'has_instructions'],
      dtype='object')
                                         name  Unnamed: 0 country  \
0  1-Day Noodles (Taiwanese Beef Noodle Soup)        1936      US   
1                         1-Hour Banana Bread        3885      US   
2               1-Hour Buffalo  Chicken Wings        1852      US   
3             1-Hour Noodles (Zha Jiang Mian)        1736      US   
4                            1-Minute Noodles        1536      US   

                                         description   id_ keywords  \
0                                     

In [4]:
# recipes_knn.ipynb
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import joblib
import re

# 1. Load data
df = pd.read_csv("recipes_with_ingredients_and_tags.csv")
# 2. Preprocess text
def preprocess(text):
    # Convert to string and handle NaN
    text = str(text) if pd.notnull(text) else ""
    # Convert to lowercase and remove special characters
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    return text

df['processed'] = df['ingredients'].apply(preprocess) + ' ' + df['tag_name'].apply(preprocess)

# 3. Create TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])

# 4. Train KNN model
knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(tfidf_matrix)

# 5. Recommendation function
def get_knn_recommendations(query, top_n=5):
    # Validate English input
    if re.search(r'[^a-zA-Z0-9, ]', query):
        raise ValueError("Please use English ingredient names only")
    
    processed_query = preprocess(query)
    query_vec = tfidf.transform([processed_query])
    distances, indices = knn.kneighbors(query_vec, n_neighbors=top_n)
    
    return df.iloc[indices[0]][['name', 'ingredients', 'tag_name']]

# Example usage with English ingredients
test_query = "chicken, rice, soy sauce"
print(get_knn_recommendations(test_query))


                                    name  \
2083               General Tso’s Chicken   
2679                    Kung Pao Chicken   
3240  One-Pot Chicken Teriyaki With Rice   
3579     Pork Red Na (Noodles And Gravy)   
1798                 Easy Orange Chicken   

                                            ingredients  \
2083  vegetable oil, rice wine, soy sauce, boneless,...   
2679  chicken breast, sesame oil, zucchini, red pepp...   
3240  olive oil, chicken breasts, salt, pepper, garl...   
3579  pork shoulder, rice noodle, dark soy sauce, ra...   
1798  boneless, skinless chicken breasts, soy sauce,...   

                                               tag_name  
2083  Saute Pan, Spider, Spatula, Chef's Knife, Cutt...  
2679  Wooden Spoon, Pyrex, Tongs, Measuring Spoons, ...  
3240  Dairy-Free, High-Protein, Kid-Friendly, Weekni...  
3579   Dairy-Free, Weeknight, Dinner, Stove Top, Fusion  
1798  Oh So Rosé, Saute Pan, Tongs, Cutting Board, O...  


In [5]:
#Definiera förbearbetningsfunktion
def preprocess_text(text):
    if pd.isnull(text):
        return ""
    return ' '.join(str(text).lower().replace(',', ' ').split())


In [6]:
#Skapa en ny kolumn som kombinerar ingredienser och taggar
df['processed'] = df['ingredients'].apply(preprocess_text) + ' ' + df['tag_name'].apply(preprocess_text)


In [7]:
#Träna och spara TF-IDF-modellen
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['processed'])

os.makedirs("models", exist_ok=True)
#joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(tfidf_matrix, "models/tfidf_matrix.pkl")

# Efter att ha skapat tfidf_matrix
knn = NearestNeighbors(n_neighbors=10, metric='cosine')
knn.fit(tfidf_matrix)

# Spara både TF-IDF och KNN
joblib.dump(tfidf, "models/tfidf_model.pkl")
joblib.dump(knn, "models/knn_model.pkl")

print("✅ Modeller sparade!")


✅ Modeller sparade!


In [8]:
#Definiera rekommendationsfunktion
def get_recommendations(user_input, top_n=5):
    processed_input = preprocess_text(user_input)
    input_vec = tfidf.transform([processed_input])
    sim_scores = cosine_similarity(input_vec, tfidf_matrix).flatten()
    top_indices = sim_scores.argsort()[-top_n:][::-1]
    results = df.iloc[top_indices][['name', 'ingredients', 'description']].fillna("")
    return results


In [9]:
#Validera modellen med kända recept
chicken_recipes = df[df['name'].str.contains('Chicken', case=False, na=False)].sample(3)
for _, row in chicken_recipes.iterrows():
    print(f"\nTestar med: {row['ingredients']}")
    print(get_recommendations(row['ingredients']).head(3))



Testar med: olive oil, medium yellow onion, garlic, ground chicken, large egg, panko breadcrumbs, grated parmesan cheese, fresh parsley, kosher salt, freshly ground black pepper, pasta, sauce of choice
                              name  \
1027             Chicken Meatballs   
460         Basic Turkey Meatballs   
2891  Meatball Stuffed Shell Pasta   

                                            ingredients  \
1027  olive oil, medium yellow onion, garlic, ground...   
460   olive oil, medium yellow onion, garlic, ground...   
2891  ground beef, ground pork, panko breadcrumbs, d...   

                                            description  
1027  Chicken dinner takes a new shape with these sa...  
460   Everybody loves meatball night! These classic ...  
2891                                                     

Testar med: chicken wings, kosher salt, Tasty Zesty Spice Blend
                                          name  \
144              Air Fryer Zesty Chicken Wings   
2236      

In [10]:
# Test loading vectorizer
loaded_tfidf = joblib.load("models/tfidf_model.pkl")
print("Loaded IDF vector exists:", hasattr(loaded_tfidf, "idf_"))
test_text = "chicken, rice, soy sauce"
test_vec = loaded_tfidf.transform([test_text])
print("Test transform successful!")


Loaded IDF vector exists: True
Test transform successful!
