In [1]:
# RAG_Pipeline.ipynb
import os
import pandas as pd
import joblib
import re
import numpy as np
from openai import OpenAI
from dotenv import load_dotenv
import nltk


In [3]:
# Konfigurera NLTK
nltk_data_path = os.path.join(os.path.expanduser("~"), "nltk_data")
os.makedirs(nltk_data_path, exist_ok=True)
nltk.data.path.append(nltk_data_path)

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet', download_dir=nltk_data_path)
    nltk.download('punkt', download_dir=nltk_data_path)

from nltk.stem import WordNetLemmatizer

# --------------------------
# Globala funktioner
# --------------------------
ingredient_synonyms = {
    'chicken': ['poultry', 'hen', 'chicken breast'],
    'beef': ['ground beef', 'sirloin', 'roast beef'],
    'potato': ['potatoes', 'spuds', 'yukon gold']
}

lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = str(text).lower()
    for key, synonyms in ingredient_synonyms.items():
        for synonym in synonyms:
            text = re.sub(r'\b' + re.escape(synonym) + r'\b', key, text)
    text = re.sub(r'[^\w\s,-]', '', text)
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# --------------------------
# RAG-klass
# --------------------------
class RecipeRAG:
    def __init__(self, model_path, data_path):
        self.pipeline = joblib.load(model_path)
        self.df = pd.read_csv(data_path)
        load_dotenv()
        self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    
    def retrieve(self, query, top_k=5):
        """Hämta recept med KNN-modellen"""
        processed_query = preprocess(query)
        query_vec = self.pipeline['tfidf'].transform([processed_query])
        distances, indices = self.pipeline['knn'].kneighbors(query_vec, n_neighbors=top_k)
        return self.df.iloc[indices[0]]
    
    def generate_description(self, recipes):
        """Generera LLM-baserade beskrivningar"""
        descriptions = []
        for _, recipe in recipes.iterrows():
            prompt = f"""Beskriv detta recept på ett lockande sätt:
            Namn: {recipe['name']}
            Ingredienser: {recipe['ingredients']}
            Taggar: {recipe['tag_name']}
            """
            
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo-0125",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=150
            )
            descriptions.append(response.choices[0].message.content)
        return descriptions
    
    def save(self, path):
        """Spara pipeline"""
        joblib.dump(self, path)

# --------------------------
# Huvudkörning
# --------------------------
if __name__ == "__main__":
    # Initiera RAG
    rag = RecipeRAG(
        model_path="models/full_pipeline.pkl",
        data_path="recipes_with_ingredients_and_tags.csv"
    )
    
    # Testa retrieval
    test_query = "chicken, rice, soy sauce"
    results = rag.retrieve(test_query)
    print(f"\n🔍 Rekommendationer för '{test_query}':")
    print(results[['name', 'ingredients']].head(3))
    
    # Testa generering
    try:
        descriptions = rag.generate_description(results.head(2))
        print("\n📝 Genererade beskrivningar:")
        for desc in descriptions:
            print(f"- {desc}")
    except Exception as e:
        print(f"\n❌ Genereringsfel: {str(e)}")
        print("Kontrollera din OpenAI API-nyckel i .env-filen!")


[nltk_data] Downloading package wordnet to C:\Users\User\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\User\nltk_data...
[nltk_data]   Package punkt is already up-to-date!



🔍 Rekommendationer för 'chicken, rice, soy sauce':
                                             name  \
3240           One-Pot Chicken Teriyaki With Rice   
1997             Fried Rice: Soy, Soy Revolution!   
1998  Fried Rice: Soy, Spice, and Everything Nice   

                                            ingredients  
3240  olive oil, chicken breasts, salt, pepper, garl...  
1997  firm tofu, teriyaki sauce, vegetable oil, kimc...  
1998  firm tofu, teriyaki sauce, vegetable oil, kimc...  

📝 Genererade beskrivningar:
- One-Pot Chicken Teriyaki With Rice är den perfekta balansen av sälta, söta och kryddiga smaker, allt i en enda gryta. Inget krångel med enkla ingredienser som kyckling, vitlök, ingefära och grönsaker som broccoli, morot och röd paprika. Den lena teriyaki-såsen är en smakexplosion av sojasås, risvinäger, honung och sriracha. Allt serverat över fluffigt vitris för en bekväm och tillfredsställande middag. Ett måste för alla som älskar
- Dyka in i en smakrik revolution av

In [5]:
from flask import jsonify

class RecipeChatbot:
    def __init__(self, rag_system):
        self.rag = rag_system
        
    def handle_message(self, user_input):
        # Steg 1: Hämta rekommendationer
        recipes = self.rag.retrieve(user_input, top_k=3)
        
        # Steg 2: Generera naturligt språk-svar
        prompt = f"""Användaren frågar: {user_input}
        Relevanta recept: {recipes[['name', 'ingredients']].to_dict()}
        Formulera ett hjälpsamt svar på svenska som inkluderar receptnamn och ingredienser.
        """
        
        response = self.rag.client.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200
        )
        
        return response.choices[0].message.content


In [6]:
# I din notebook där du tränar modellen:
from RAG_Pipeline import TextPreprocessor

# Uppdatera pipelinen:
pipeline = make_pipeline(
    TextPreprocessor(),  # Använd klassen istället för funktionen
    TfidfVectorizer(),
    KNeighborsClassifier()
)

# Träna och spara modellen
joblib.dump(pipeline, "models/full_pipeline.pkl")


[nltk_data] Downloading package wordnet to C:\Users\User/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\User/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


NameError: name 'make_pipeline' is not defined