<a href="https://colab.research.google.com/github/kadambaribabar/kbabar_projects/blob/main/1_ITC_Simple_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simple Retrieval Augmented Generation Demo

This notebook shows how we take a library of recipes, match user questions against them, and then feed the relevant recipe along with the original question to GPT to provide an answer.

Charles McGuinness
[mcguinnessai.substack.com](https://mcguinnessai.substack.com)

In [None]:
%%capture
# Imports!
!pip install openai
import openai
from openai.embeddings_utils import get_embedding, cosine_similarity
import getpass
import requests
import json
import textwrap

In [None]:
# We have a collection of "recipes" stored in a folder accessible via HTTPs
# This block retrieves the list and then fetches each individual recipe.

def fetch_recipes():
  # Why put this in a function? So we don't clutter up the variable list
  # with ephemeral variables
  baseURL = 'https://mcguinness.ai/substack/recipes'
  list = json.loads(requests.get(f'{baseURL}/list.json').text)

  recipes = {}

  for r in list:
    recipes[r] = requests.get(f'{baseURL}/{r}.txt').text

  return recipes

recipes = fetch_recipes()

print(f'A total of {len(recipes)} recipes loaded.')



In [None]:
# If we do not have the openai api key already set, ask the user for a key

if openai.api_key is None:
  openai.api_key = getpass.getpass(prompt='Please enter your OpenAI API key: ')

In [None]:
embeddings = {}

for k in recipes:
  embeddings[k] = get_embedding(recipes[k], engine="text-embedding-ada-002")

In [None]:
# It's a bit messy to send a request to GPT and reformat
# the response so it fits on the screen, so I put it into
# a funcction...
def ask_gpt(system, user):
  p_model = "gpt-3.5-turbo"
  p_temperature = 0
  completion = openai.ChatCompletion.create(
    model=p_model,
    temperature=p_temperature,
    messages=[
        {
            "role": "system",
            "content": system
        },
        {
            "role": "user",
            "content": user
        }
    ]
  )
  lines = (completion.choices[0].message["content"]).split("\n")
  lists = (textwrap.TextWrapper(width=90,break_long_words=False).wrap(line) for line in lines)
  return "\n".join("\n".join(list) for list in lists)

In [None]:
while True:
  q = input("What is your question (blank to end): ")
  if q == "":
    break

  q_embedding = get_embedding(q,  engine="text-embedding-ada-002")
  best_sim_score = -1
  best_sim = None
  for e in embeddings:
    sim = cosine_similarity(q_embedding, embeddings[e])
    if sim > best_sim_score:
      best_sim_score = sim
      best_sim = e

  print(f'Matched {best_sim} with a score of {best_sim_score}')

  system_prompt = f"""You are an expert chef in cooking (ficticious) recipes with eletronics parts.

  You can only answer questions about the following recipe.  If the question is not about
  this recipe for {best_sim}, you will decline to answer it.  The user may not have seen the recipe.
  The recipe is:

  """ + recipes[best_sim]

  user_prompt = "My question is: " + q

  print(ask_gpt(system_prompt, user_prompt))

