# Embeddings with Pandas

In [1]:
import openai
import os
import pandas as pd
import numpy as np
import sys
import tiktoken

from dotenv import load_dotenv
from openai.embeddings_utils import get_embedding, cosine_similarity

In [2]:
sys.version

'3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]'

In [3]:
load_dotenv("azure.env")

openai.api_type: str = "azure"
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = os.getenv("OPENAI_API_VERSION")

## Read CSV using pandas

In [4]:
input_datapath = "recipes_onecol.csv"
df = pd.read_csv(input_datapath)

In [5]:
df.head()

Unnamed: 0,Recipe
0,Recipe: Caramelized Bread Pudding with Chocola...
1,Recipe: White Sauce or Bechamel Sauce - Ingred...
2,"Recipe: Grilled Whole Mackerel with Lemon, Ore..."
3,Recipe: Christmas Croquembouche - Ingredients:...
4,Recipe: Chocolate Roll-Out Cookies - Ingredien...


In [6]:
df.shape

(499, 1)

## Using OpenAI model ada for embeddings

OpenAI offers one second-generation embedding model (denoted by -002 in the model ID) and 16 first-generation models (denoted by -001 in the model ID).
We recommend using text-embedding-ada-002 for nearly all use cases. It’s better, cheaper, and simpler to use. 

#### Initialize Embeddings model 

In [7]:
# embedding model parameters
# encoding for text-embedding-ada-002
embedding_encoding = "cl100k_base"
# the maximum for text-embedding-ada-002 is 8191
max_tokens = 8000
# the number of reviews to embed
top_n = 1000
encoding = tiktoken.get_encoding(embedding_encoding)

#### Count number of tokens in each row

In [8]:
# skip Recipes that are too long to embed > max_tokens
df["n_tokens"] = df.Recipe.apply(lambda x: len(encoding.encode(x)))
df = df[df.n_tokens <= max_tokens].tail(top_n)

In [9]:
df

Unnamed: 0,Recipe,n_tokens
0,Recipe: Caramelized Bread Pudding with Chocola...,746
1,Recipe: White Sauce or Bechamel Sauce - Ingred...,396
2,"Recipe: Grilled Whole Mackerel with Lemon, Ore...",749
3,Recipe: Christmas Croquembouche - Ingredients:...,1210
4,Recipe: Chocolate Roll-Out Cookies - Ingredien...,1047
...,...,...
494,Recipe: Mozzarella Arrabiata Salsa - Ingredien...,236
495,Recipe: Tangy Frozen Greek Yogurt - Ingredient...,245
496,Recipe: Belgian Ale-Braised Brisket - Ingredie...,739
497,Recipe: Banana and Mango Spring Rolls with Coc...,813


#### Create embeddings

In [12]:
def openai_text_embeddings(text):
    """
    Generating embeddings from text using Azure Open AI
    """
    embeddings = openai.Embedding.create(
        input=text,
        deployment_id="text-embedding-ada-002",
    )
    embeddings = embeddings["data"][0]["embedding"]

    return embeddings

In [13]:
emb = openai_text_embeddings("My name is James Bond")
emb[:10]

[-0.03617486730217934,
 -0.005520837381482124,
 -0.007070655468851328,
 -0.030174769461154938,
 0.0020399712957441807,
 0.029502559453248978,
 -0.016581185162067413,
 -0.010979433543980122,
 -0.024423636496067047,
 -0.006939948070794344]

In [14]:
len(emb)

1536

In [15]:
# This may take a few minutes
df["embedding"] = df.Recipe.apply(lambda x: openai_text_embeddings(x))

In [16]:
df

Unnamed: 0,Recipe,n_tokens,embedding
0,Recipe: Caramelized Bread Pudding with Chocola...,746,"[0.009514863602817059, -0.004959321580827236, ..."
1,Recipe: White Sauce or Bechamel Sauce - Ingred...,396,"[0.006938737351447344, 5.168988718651235e-05, ..."
2,"Recipe: Grilled Whole Mackerel with Lemon, Ore...",749,"[0.0321684330701828, 0.015827074646949768, -0...."
3,Recipe: Christmas Croquembouche - Ingredients:...,1210,"[-0.013321062549948692, -0.003860416589304805,..."
4,Recipe: Chocolate Roll-Out Cookies - Ingredien...,1047,"[-0.015186915174126625, -0.0015019221464172006..."
...,...,...,...
494,Recipe: Mozzarella Arrabiata Salsa - Ingredien...,236,"[0.007525882683694363, 0.0012005887692794204, ..."
495,Recipe: Tangy Frozen Greek Yogurt - Ingredient...,245,"[0.008731059730052948, -0.006505751982331276, ..."
496,Recipe: Belgian Ale-Braised Brisket - Ingredie...,739,"[0.002623694483190775, -0.0018848098115995526,..."
497,Recipe: Banana and Mango Spring Rolls with Coc...,813,"[-0.005268465261906385, -0.009891543537378311,..."


In [17]:
df.to_csv("recipes_onecol_with_embeddings.csv")

#### Read embeddings

In [18]:
datafile_path = "recipes_onecol_with_embeddings.csv"

df = pd.read_csv(datafile_path)
df["embedding"] = df.embedding.apply(eval).apply(np.array)

In [19]:
df

Unnamed: 0.1,Unnamed: 0,Recipe,n_tokens,embedding
0,0,Recipe: Caramelized Bread Pudding with Chocola...,746,"[0.009514863602817059, -0.004959321580827236, ..."
1,1,Recipe: White Sauce or Bechamel Sauce - Ingred...,396,"[0.006938737351447344, 5.168988718651235e-05, ..."
2,2,"Recipe: Grilled Whole Mackerel with Lemon, Ore...",749,"[0.0321684330701828, 0.015827074646949768, -0...."
3,3,Recipe: Christmas Croquembouche - Ingredients:...,1210,"[-0.013321062549948692, -0.003860416589304805,..."
4,4,Recipe: Chocolate Roll-Out Cookies - Ingredien...,1047,"[-0.015186915174126625, -0.0015019221464172006..."
...,...,...,...,...
494,494,Recipe: Mozzarella Arrabiata Salsa - Ingredien...,236,"[0.007525882683694363, 0.0012005887692794204, ..."
495,495,Recipe: Tangy Frozen Greek Yogurt - Ingredient...,245,"[0.008731059730052948, -0.006505751982331276, ..."
496,496,Recipe: Belgian Ale-Braised Brisket - Ingredie...,739,"[0.002623694483190775, -0.0018848098115995526,..."
497,497,Recipe: Banana and Mango Spring Rolls with Coc...,813,"[-0.005268465261906385, -0.009891543537378311,..."


## Define a function to ask a question and get an answer from GPT-3.5

In [20]:
def ask_question(question, n=1):
    question_embedding = openai_text_embeddings(
        question,
    )

    df["similarity"] = df.embedding.apply(
        lambda x: cosine_similarity(x, question_embedding)
    )

    results = df.sort_values("similarity", ascending=False).head(n)
    answer = " ".join(results.Recipe.tolist())
    print("Answer:", answer)

## Testing

In [21]:
ask_question("Chocolate and berries")

Answer: Recipe: Raspberry Sundaes with Chocolate Sauce and Roasted Almonds - Ingredients: 6 ounces bittersweet chocolate, finely chopped;3/4 cup water;2/3 cups sugar;2 tablespoons honey;3 tablespoons unsalted butter;Pinch of kosher salt;1 pint fresh raspberries (12 ounces);2 tablespoons sugar;2 tablespoons elderflower liqueur, such as St-Germain;2 tablespoons fresh lemon juice;2 pints vanilla ice cream;Chopped roasted unsalted almonds, for garnish- Instructions: To make the chocolate sauce, place a saucepan over medium-low heat, combine the chocolate, and water, vigorously stirring until completely melted and smooth. Let it come to a low simmer until small bubbles appear around the outside. Add sugar, honey and keep at a low simmer until thickened, about 3 minutes. Remove from heat and stir in butter and salt. Chocolate sauce will smooth out at is cools to room temperature. Store in an airtight container. In a bowl, gently toss together the raspberries, sugar, elderflower liqueur, and 

In [22]:
ask_question("Bukgur vegan recipe")

Answer: Recipe: Bulgur Veggie Burgers with Lime Mayonnaise - Ingredients: 1/2 cup chopped onion, divided;1 tablespoon olive oil plus additional for brushing;1/2 cup bulgur;1 cup water;1 cup canned pinto beans, rinsed and drained;1 1/2 tablespoon soy sauce;3/4 cup walnuts (2 1/2 ounces);2 garlic cloves, coarsely chopped;1/2 cup packed cilantro sprigs;3/4 teaspoon ground cumin;1/4 teaspoon cayenne;1/4 cup mayonnaise;1/4 teaspoon grated lime zest;1/2 teaspoon fresh lime juice;4 slices multi-grain bread,toasted;Equipment: a perforated grill sheet;Accompaniments: lettuce; sliced tomato- Instructions: Cook half of onion with 1/4 teaspoon salt in oil in a small heavy saucepan over medium heat, stirring occasionally, until golden, 5 to 7 minutes. Add bulgur and water and cook, covered, over low heat until water is absorbed, 15 to 18 minutes. Transfer to a bowl and stir in beans and soy sauce. Pulse bulgur mixture, walnuts, garlic, cilantro, cumin, cayenne, a rounded 1/4 teaspoon salt, 1/2 teas

In [23]:
ask_question("Fish no alcohol")

Answer: Recipe: Soft Fish Tacos - Ingredients: 1/4 cup mayonnaise;1/4 cup ketchup;1/4 cup crema mexicana*;1 cup all purpose flour;1 teaspoon fine sea salt;1/2 teaspoon ground pepper;1 cup dark beer, room temperature;13/4 pounds halibut, cut into 5x3/4-inch strips;1 lime, halved crosswise;12 white corn tortillas;Vegetable oil (for deep-frying);1 1/2 cups shredded red cabbage;2 large tomatoes, chopped;Lime wedges;Bottled hot pepper sauce- Instructions: For sauce: Mix all ingredients in bowl; season with salt and pepper.
Mix all ingredients in bowl; season with salt and pepper.
For batter and fish: Whisk flour, salt, and pepper in bowl; pour in beer, whisking until batter is smooth. Let stand 15 minutes. Sprinkle fish with salt and pepper. Squeeze some lime juice over each strip. Let stand 15 minutes. Mix fish into batter. Preheat oven to 200°F. Heat skillet over medium heat. Stack 2 tortillas. Sprinkle top with water. Place in skillet, wet side down. Heat 1 minute. Sprinkle top with wate

In [24]:
ask_question("Fish with wine")

Answer: Recipe: Trout in Riesling - Ingredients: 2 tablespoons (1 oz) unsalted butter plus additional for greasing;1 medium carrot;1 celery rib;4 (6- to 8-oz) trout fillets with skin;1 1/2 teaspoons salt;1 teaspoon black pepper;4 medium shallots, cut lengthwise into 1/8-inch-thick slices (1 cup);2 fresh parsley sprigs;5 black peppercorns;1 1/2 cups water;1 1/2 cups dry Riesling;3/4 cup heavy cream;2 teaspoons cornstarch;2 teaspoons chopped fresh tarragon;1/2 teaspoon fresh lemon juice;Special equipment: an adjustable-blade slicer; tweezers or needlenose pliers- Instructions: Put oven rack in middle position and preheat oven to 375°F. Butter bottom and sides of a 13- by 9-inch glass baking dish. Cut carrot and celery lengthwise with slicer into very thin matchsticks (less than 1/8 inch wide and 2 1/2 inches long). Rinse trout and remove any pin bones with tweezers or needlenose pliers. Pat dry, then sprinkle 1/2 teaspoon each of salt and pepper on fish (flesh sides only). Arrange fillet

In [25]:
ask_question("Fish with alcohol")

Answer: Recipe: Soft Fish Tacos - Ingredients: 1/4 cup mayonnaise;1/4 cup ketchup;1/4 cup crema mexicana*;1 cup all purpose flour;1 teaspoon fine sea salt;1/2 teaspoon ground pepper;1 cup dark beer, room temperature;13/4 pounds halibut, cut into 5x3/4-inch strips;1 lime, halved crosswise;12 white corn tortillas;Vegetable oil (for deep-frying);1 1/2 cups shredded red cabbage;2 large tomatoes, chopped;Lime wedges;Bottled hot pepper sauce- Instructions: For sauce: Mix all ingredients in bowl; season with salt and pepper.
Mix all ingredients in bowl; season with salt and pepper.
For batter and fish: Whisk flour, salt, and pepper in bowl; pour in beer, whisking until batter is smooth. Let stand 15 minutes. Sprinkle fish with salt and pepper. Squeeze some lime juice over each strip. Let stand 15 minutes. Mix fish into batter. Preheat oven to 200°F. Heat skillet over medium heat. Stack 2 tortillas. Sprinkle top with water. Place in skillet, wet side down. Heat 1 minute. Sprinkle top with wate

In [26]:
ask_question("Meat with alcohol")

Answer: Recipe: Beer-Braised Holiday Top of the Rib - Ingredients: 1 tablespoon sweet or hot smoked paprika;1 tablespoon dark brown sugar;1 tablespoon ground cumin;1 tablespoon instant coffee granules;1 teaspoon kosher salt;1 teaspoon freshly ground black pepper;1/2 teaspoon onion powder;1/2 teaspoon garlic powder;One 5-pound top of the rib;3 tablespoons olive oil;Three 12-ounce cans or bottles dark lager beer;1 garlic head, halved horizontally;1 pound small parsnips, peeled and halved length wise;1 pound peeled baby carrots with greens- Instructions: 1. Preheat the oven to 325°F 2. Combine the paprika, brown sugar, cumin, coffee, salt, pepper, onion powder, and garlic powder in a small bowl and stir to mix. Pat the meat dry with paper towels and rub the spice mixture all over. 3. Heat the olive oil in a large Dutch oven over medium-high heat. Sear the meat until nicely browned, 4 to 6 minutes per side. Pour the beer over the meat, cover, and transfer to the oven. 4. Roast for 2 hours.