In [1]:
# lets install the pre-requisite packages
#!pip install -r requirements.txt

## 1. Embedding of the data

In [1]:
import pandas as pd

In [2]:
# read the data
raw_data = pd.read_csv('top_rated_wines.csv')
raw_data.head()

Unnamed: 0,name,region,variety,rating,notes
0,3 Rings Reserve Shiraz 2004,"Barossa Valley, Barossa, South Australia, Aust...",Red Wine,96.0,Vintage Comments : Classic Barossa vintage con...
1,Abreu Vineyards Cappella 2007,"Napa Valley, California",Red Wine,96.0,Cappella is a proprietary blend of two clones ...
2,Abreu Vineyards Cappella 2010,"Napa Valley, California",Red Wine,98.0,Cappella is one of the oldest vineyard sites i...
3,Abreu Vineyards Howell Mountain 2008,"Howell Mountain, Napa Valley, California",Red Wine,96.0,When David purchased this Howell Mountain prop...
4,Abreu Vineyards Howell Mountain 2009,"Howell Mountain, Napa Valley, California",Red Wine,98.0,"As a set of wines, it is hard to surpass the f..."


In [3]:
# data clean and convert into Json record
raw_data = raw_data[raw_data['variety'].notna()] # remove any NaN values as it blows up serialization
data = raw_data.to_dict('records')

In [5]:
data[0]

{'name': '3 Rings Reserve Shiraz 2004',
 'region': 'Barossa Valley, Barossa, South Australia, Australia',
 'variety': 'Red Wine',
 'rating': 96.0,
 'notes': 'Vintage Comments : Classic Barossa vintage conditions. An average wet Spring followed by extreme heat in early February. Occasional rainfall events kept the vines in good balance up to harvest in late March 2004. Very good quality coupled with good average yields. More than 30 months in wood followed by six months tank maturation of the blend prior to bottling, July 2007. '}

## 2. Embedding and Vectorization

In [6]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [7]:
# create in-memory local vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [8]:
# using SentenceTransformer
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [9]:
# get embedding dimension from encoder
embeddingSize=encoder.get_sentence_embedding_dimension()
embeddingSize

384

In [10]:
# Create collection to store data
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=embeddingSize, # Vector size is defined by used model
        distance=models.Distance.COSINE # distance metric
    )
)

True

In [11]:
# Now, lets upload the data to vector database
# vectorize!
qdrant.upload_records(
    collection_name="top_wines",
    records=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

## 3. Get Context(Records) for User prompt from vector DB 

In [12]:
# Now lets get relevent context from vector DB from user_prompt
# user_prompt = "Suggest me an amazing Malbec wine from california"
user_prompt = "list 3 wines from california"

# Search time for awesome wines!
hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Krug Grande Cuvee Brut (164th Edition) with Gift Box', 'region': 'Champagne, France', 'variety': 'Sparkling & Champagne', 'rating': 96.0, 'notes': 'This is an extraordinary blend of 127 wines from 11 different years, the oldest from 1990 and the youngest from 2008. As you taste it, notes of toasted bread, hazelnut, nougat, barley sugar and jellied fruits may take you by surprise. You may even taste hints of apples still on the tree, flowers in bloom, ripe and dried fruit, almonds, marzipan, gingerbread, sweet spices and even brioche and honey.'} score: 0.6034219618554251
{'name': 'Bruno Rocca Barbaresco Rabaja 2000', 'region': 'Barbaresco, Piedmont, Italy', 'variety': 'Red Wine', 'rating': 97.0, 'notes': 'Unbelievable aromas, with rich, ripe plum but also mineral, tobacco and cedar undertones. Full-bodied, with lovely ripe tannins and a unctuous combination of ripe fruit and light toasty oak. Goes on and on. Fabulous. Greatest wine ever from Bruno Rocca. Best after 2007. 1,50

In [13]:
# use this hits to create a search result
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]
search_results

[{'name': 'Krug Grande Cuvee Brut (164th Edition) with Gift Box',
  'region': 'Champagne, France',
  'variety': 'Sparkling & Champagne',
  'rating': 96.0,
  'notes': 'This is an extraordinary blend of 127 wines from 11 different years, the oldest from 1990 and the youngest from 2008. As you taste it, notes of toasted bread, hazelnut, nougat, barley sugar and jellied fruits may take you by surprise. You may even taste hints of apples still on the tree, flowers in bloom, ripe and dried fruit, almonds, marzipan, gingerbread, sweet spices and even brioche and honey.'},
 {'name': 'Bruno Rocca Barbaresco Rabaja 2000',
  'region': 'Barbaresco, Piedmont, Italy',
  'variety': 'Red Wine',
  'rating': 97.0,
  'notes': 'Unbelievable aromas, with rich, ripe plum but also mineral, tobacco and cedar undertones. Full-bodied, with lovely ripe tannins and a unctuous combination of ripe fruit and light toasty oak. Goes on and on. Fabulous. Greatest wine ever from Bruno Rocca. Best after 2007. 1,500 cases

## 4. Generate Response based on Search result 

In [15]:
# Now time to create  a client that connected to local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
)


In [17]:
# using search result, lets LLM to generate response based on content "search_results"
completion = client.chat.completions.create(
    model="llama.cpp",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": str(search_results)}
    ]
)
print(completion.choices[0].message)

ChatCompletionMessage(content="Here are three amazing wines from California:\n\n1. 2016 Domaine Carneros Le Reve Blanc de Blancs Brut Champagne - This wine is made from 100% Chardonnay grapes and has a rating of 95. It offers notes of toasted brioche, ripe pear, and a hint of citrus. It is a perfect pairing for a special occasion.\n2. 2018 Flowers Sonoma Coast Pinot Noir - This wine is made from 100% Pinot Noir grapes and has a rating of 94. It offers notes of red fruits, earth, and a hint of spice. It is a great wine for those who enjoy full-bodied reds.\n3. 2019 St. Joseph's Crozes-Hermitage Syrah - This wine is made from 100% Syrah grapes and has a rating of 93. It offers notes of dark fruits, earth, and a hint of spice. It is a great wine for those who enjoy bold and full-bodied reds.", role='assistant', function_call=None, tool_calls=None)
