In [15]:
import pandas as pd
from openai.embeddings_utils import get_embedding, cosine_similarity
import openai
import numpy as np
import os
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.environ["OPENAI_API_KEY"]

# Joe's online grocery store

#### Joe decided to start his own business - an online grocery store. He had 15 items in stock:

In [16]:
df = pd.DataFrame([
    {"name": "Banana", "description": "Organic bananas from Australia"},
    {"name": "Butter", "description": "Whole butter 500g"},
    {"name": "Bread", "description": "Gluten free bread 200g"},
    {"name": "Avocado", "description": "Ready to eat Avocado"},
    {"name": "Orange", "description": "Oranges from Spain"},
    {"name": "Yogurt", "description": "Organic yogurt from local farmers"},
    {"name": "Milk 0.1%", "description": "Low fat milk"},
    {"name": "Milk 6%", "description": "Whole milk"},
    {"name": "Tomato", "description": "Ripe tomatoes from local farmer"},
    {"name": "Vegan meatballs", "description": "Meatballs made from vegan ingredients"},
    {"name": "Walnuts", "description": "500g of Walnuts from Brazil"},
    {"name": "Cheese", "description": "Lactose free cheese"},
    {"name": "Salami", "description": "Salami straight from Italy"},
    {"name": "Crackers", "description": "Made by Tesco"},
    {"name": "Green olives", "description": "Organic green olives"},
])
df

Unnamed: 0,name,description
0,Banana,Organic bananas from Australia
1,Butter,Whole butter 500g
2,Bread,Gluten free bread 200g
3,Avocado,Ready to eat Avocado
4,Orange,Oranges from Spain
5,Yogurt,Organic yogurt from local farmers
6,Milk 0.1%,Low fat milk
7,Milk 6%,Whole milk
8,Tomato,Ripe tomatoes from local farmer
9,Vegan meatballs,Meatballs made from vegan ingredients


#### Joe knew that every good online store has a search engine. So he decided to build one also. He had heard about LLM's and decided to build a search engine that uses LLM embeddings.

In [17]:
# Create a combined name for a product
df["combined"] = ("name: " + df.name.str.strip() + "; description: " + df.description.str.strip())

# Get embedding value for the combined named
model_id = "text-embedding-ada-002"
df["embedding"] = df.combined.apply(lambda x: get_embedding(x, engine=model_id))

def search(df, search_query, n=3):
    # Function that converts the seach query to embedding and 
    # looks for most similar products based on cosine similarity between
    # search query embedding and product embeddings

    query_embedding = get_embedding(search_query, engine=model_id)
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(np.array(x), query_embedding))
    results = df.sort_values("similarity", ascending=False).head(n).combined.str.replace("name: ", "").str.replace("; description:", ": ")
    for r in results:
        print(r)
    return None


#### Now Joe was ready to test his search engine. First he made few simple tests:

In [18]:
search(df, search_query="milk")

Milk 0.1%:  Low fat milk
Milk 6%:  Whole milk
Yogurt:  Organic yogurt from local farmers


In [19]:
search(df, search_query="organic")

Yogurt:  Organic yogurt from local farmers
Green olives:  Organic green olives
Banana:  Organic bananas from Australia


#### Looks like the search engine works quite good with simple keywords that exist in product name/description also. 
#### Joe decided to test some more interesting case - searching based on keywords that don't exist in the product name/descritpion:

In [20]:
search(df, search_query="fruits")

Orange:  Oranges from Spain
Banana:  Organic bananas from Australia
Tomato:  Ripe tomatoes from local farmer


In [21]:
search(df, search_query="stuff for quacamole")

Avocado:  Ready to eat Avocado
Green olives:  Organic green olives
Tomato:  Ripe tomatoes from local farmer


In [22]:
search(df, search_query="snacks for party")

Crackers:  Made by Tesco
Avocado:  Ready to eat Avocado
Salami:  Salami straight from Italy


In [23]:
search(df, search_query="I want to gain weight")

Butter:  Whole butter 500g
Bread:  Gluten free bread 200g
Milk 6%:  Whole milk


In [24]:
search(df, search_query="allergic")

Cheese:  Lactose free cheese
Vegan meatballs:  Meatballs made from vegan ingredients
Bread:  Gluten free bread 200g
