In [20]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(700).to_dict('records')
len(data)

700

In [21]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [22]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings



In [23]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [24]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [25]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [26]:
user_prompt = "suggest me an good wine form Argentina"

In [27]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': 'Jorge Ordonez Number 4 Esencia (375ML half-bottle) 2004', 'region': 'Spain', 'variety': 'Boutique', 'rating': 99.0, 'notes': 'Esencia is a unique wine that incorporates the raisined muscat grape.  After 24 months in barrel, we achieve a partial fermentation of the must.  Alois Kracher, through this wine, sought to convey the essence of the village of Almáchar, in the heart of the Axarquía, famous from time immemorial for its delicious muscat grapes and raisins.  '} score: 0.5693690133985975
{'name': 'Domaine Bois de Boursan Chateauneuf-du-Pape Cuvee Felix 2000', 'region': 'Chateauneuf-du-Pape, Rhone, France', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'It is a structured and powerful wine. It has notes of black fruits, licorice and cocoa.'} score: 0.551969210534377
{'name': 'Catena Zapata Adrianna Vineyard Malbec 2004', 'region': 'Argentina', 'variety': 'Red Wine', 'rating': 97.0, 'notes': '"The single-vineyard 2004 Malbec Adrianna Vineyard from the Gualtallary district 

In [28]:
#define a variable to hold the search results

seach_results = [hit.payload for hit in hits]

In [34]:
#Now time to conect to the local large language model
from openai import OpenAI
client = OpenAI(
    base_url="http://127.0.0.1:8080/v1",
    api_key= "sk-no-key-required"
)

completion = client.chat.completions.create(
    model='LLaMA_CPP',
    messages=[
        {"role":"sytem", "content":"You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their request."},
    {"role":'user',"content":'Suggest me an amazing Malbec wine from Argentina'},
    {"role":"assistant","content":str(seach_results)}
    ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='I recommend the Domaine Bois de Boursan Chateauneuf-du-Pape Cuvee Felix 2000. It is a structured and powerful wine with notes of black fruits, licorice and cocoa. It is a great wine that will pair well with your meal and will surely impress your guests.</s>', role='assistant', function_call=None, tool_calls=None)
