In [3]:
!pip install pandas qdrant-client sentence_transformers openai -q

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-cloud-aiplatform 1.56.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.
google-cloud-resource-manager 1.12.3 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.
grpc-google-iam-v1 0.13.0 requires protobuf!=3.20.0,!=3.20.1,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.
google-ai-generativelanguage 0.6.4 requires protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5, but you have protobuf 5.28.2 which is incompatible.
google-api-core 2.19.0 requires protobuf!=3.20.0,!=3.20.1,!=4

In [4]:
import pandas as pd
df = pd.read_csv('../../top_rated_wines.csv')
df = df[df['variety'].notna()] # remove any NaN values as it blows up serialization
data = df.sample(700).to_dict('records') # Get only 700 records. More records will make it slower to index
len(data)

700

In [5]:
from qdrant_client import models, QdrantClient
from sentence_transformers import SentenceTransformer

In [6]:
encoder = SentenceTransformer('all-MiniLM-L6-v2') # Model to create embeddings

In [7]:
# create the vector database client
qdrant = QdrantClient(":memory:") # Create in-memory Qdrant instance

In [8]:
# Create collection to store wines
qdrant.recreate_collection(
    collection_name="top_wines",
    vectors_config=models.VectorParams(
        size=encoder.get_sentence_embedding_dimension(), # Vector size is defined by used model
        distance=models.Distance.COSINE
    )
)

  qdrant.recreate_collection(


True

In [9]:
# vectorize!
qdrant.upload_points(
    collection_name="top_wines",
    points=[
        models.PointStruct(
            id=idx,
            vector=encoder.encode(doc["notes"]).tolist(),
            payload=doc,
        ) for idx, doc in enumerate(data) # data is the variable holding all the wines
    ]
)

In [10]:
user_prompt = "Suggest me an amazing wine from California."

In [11]:
# Search time for awesome wines!

hits = qdrant.search(
    collection_name="top_wines",
    query_vector=encoder.encode(user_prompt).tolist(),
    limit=3
)
for hit in hits:
  print(hit.payload, "score:", hit.score)

{'name': "Greenock Creek Alice's Shiraz 2003", 'region': 'Barossa Valley, Barossa, South Australia, Australia', 'variety': 'Red Wine', 'rating': 97.0, 'notes': 'Rich and fleshy, with pretty coffee, plum, wild berry and spice notes that are smooth and polished, long and flavorful.  An extremely limited release wine from one of Australia\'s "Cult" wineries.'} score: 0.6056202648603295
{'name': 'Cavallotto Barolo Riserva Bricco Boschis (chipped wax - 3L) 2001', 'region': 'Barolo, Piedmont, Italy', 'variety': 'Red Wine', 'rating': 96.0, 'notes': 'A wine of great structure but with elegance and complexity. Initially fruity with floral and spice aromas that open up. An excellent wine for aging.'} score: 0.598893552404807
{'name': 'Blankiet Paradise Hills Merlot 2003', 'region': 'Napa Valley, California', 'variety': 'Red Wine', 'rating': 96.0, 'notes': '"The 2003 Merlot is better out of bottle than it was from cask. An explosively rich wine that ranks among the finest Merlots I have tasted fr

In [12]:
# define a variable to hold the search results
search_results = [hit.payload for hit in hits]

In [14]:
from openai import OpenAI
client = OpenAI()
completion = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."},
        {"role": "user", "content": "Suggest me an amazing Malbec wine from Argentina"},
        {"role": "assistant", "content": str(search_results)}
    ]
)


In [15]:
print(completion.choices[0].message)

ChatCompletionMessage(content="Argentina is renowned for producing exceptional Malbec wines. Here are a couple of highly recommended options for you to consider:\n\n1. **Catena Zapata Malbec Argentino**\n   - **Region:** Mendoza\n   - **Tasting Notes:** This wine offers complex and intense aromas of ripe red and black fruits, with subtle earthy and mineral notes. Flavors of plum, blackberry, and cocoa are balanced with fine tannins and a long, elegant finish.\n   - **Why It's Special:** Catena Zapata is one of the most prestigious wineries in Argentina, known for their dedication to quality and innovation.\n\n2. **Luigi Bosca Malbec DOC**\n   - **Region:** Luján de Cuyo, Mendoza\n   - **Tasting Notes:** Aromas of red and black berries, spices, and a touch of vanilla lead to a palate that is both powerful and elegant, with balanced acidity and smooth tannins.\n   - **Why It's Special:** This wine is made from old vines and is one of the few Malbecs that carry the DOC (Denominación de Or