In [18]:
import faiss
import numpy as np
import pandas as pd
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import normalize
from ast import literal_eval

In [19]:
raw_df = pd.read_csv('../data/guardian_data_clean.csv')
df = raw_df.copy()
df.shape

(1114, 16)

In [20]:
df.fillna('', inplace=True)

In [21]:
df['tag_str'] = df.tags.map(lambda x: ', '.join(literal_eval(x)) )

In [22]:
# concatenate text content
df['content'] = (
    'Title: ' + df['title'] + ' | '
    + df['section'] + ' '
    + 'Author: ' + df['author'] + ' '
    + 'Text:  ' + df['body'] + ' '
    + '. KEYWORDS: ' + df['tag_str']  
)


In [23]:
# generate
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['content'].tolist(), show_progress_bar=True)
embeddings_norm = normalize(embeddings, axis=1)

Batches: 100%|██████████| 35/35 [00:11<00:00,  3.07it/s]


In [24]:
dimension = embeddings_norm.shape[1]
out_index = faiss.IndexFlatIP(dimension)
out_index.add(np.array(embeddings_norm))

In [25]:
faiss.write_index(out_index, "../embeddings/recipe_embeddings.index")
# to do - explore faiss a bit more, visualise it.

## RAG

In [26]:
# TO DO: add rag evaluation
# TO DO: add keyword generation

In [27]:
query = "Can you suggest an easy vegetarian recipe?"

In [28]:
index = faiss.read_index("../embeddings/recipe_embeddings.index")

In [29]:
import numpy as np

def retrieve_recipes(query, model, index, df, k=5):
    """
    Retrieve the top k recipes similar to the query using FAISS.
    """
    query_embedding = model.encode([query])
    query_embedding_norm = normalize(query_embedding, axis=1)
    distances, idx = index.search(np.array(query_embedding_norm), k)
    retrieved_data = df.iloc[idx[0]].copy()
    retrieved_data['similarity_score'] = 1 - distances[0]

    return retrieved_data

In [33]:
top_k_recipes = retrieve_recipes(query, model, index, df, k=3)
top_k_recipes[['content', 'similarity_score']]
# to do - how do I evaluate the quality of the recommendations?

Unnamed: 0,content,similarity_score
575,Title: Joe Woodhouse’s vegetarian standouts fo...,0.382591
356,Title: How to make vegetarian bibimbap – recip...,0.414459
26,Title: How to make vegetarian scotch eggs – re...,0.416668


In [48]:
def get_system_prompt(recipes: list[str]):
    system_prompt = f"""
    You work for the Guardian food section and you are knowledgeable about their recipes.
    Readers will come to you with questions. You should answer the questions based on the provided Guardian articles.
    You should mention the articles to the reader.
    You should not provide the full text of the articles, but you can provide a summary.
    Make it inclusive for ASD and ADHD readers.
    """
    i = 1
    for r in recipes:
        system_prompt += f"RECIPE {i}: {r}"
        i += 1

    return {"role": "system", "content": system_prompt}

# in the future could add a similarity score to the prompt 
# to allow to ask clarifying questions and improve recommendations

In [49]:
from openai import OpenAI
from utils import read_yaml, get_api_key, get_openai_completion

config = read_yaml("../config/rag_config.yaml")
api_key = get_api_key(config["api_key_name"])

client = OpenAI(api_key = api_key)

messages = [get_system_prompt(top_k_recipes['content'].tolist())]

print(f"You: {query}")
messages.append({"role": "user", "content": query})

answ = get_openai_completion(client, config["model"], messages)[0]
print(f"\nAssistant: {answ}")

You: Can you suggest an easy vegetarian recipe?

Assistant: Sure! If you're looking for an easy vegetarian recipe, I recommend trying Joe Woodhouse’s *Squash, red onion and stilton pie* from his article "Joe Woodhouse’s vegetarian standouts for Christmas day." 

This recipe is quite flexible, allowing you to use various vegetables you might have at hand, and it's perfect for making ahead of time. Here’s a quick summary of the steps:

1. Roast chunks of squash with red onions in the oven.
2. Sauté some greens until wilted.
3. Layer it all with filo pastry and stilton cheese in a baking tin.
4. Bake until golden brown.

The whole process takes about 1 hour and 30 minutes and serves 6, making it great for sharing. Plus, it reheats well if you make it ahead of time!

Let me know if you’d like more details or another recipe!
