In [55]:
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings

In [56]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [57]:
find_dotenv()

'E:\\repos\\llmsops\\.env'

In [58]:
embeddings = AzureOpenAIEmbeddings(deployment="text-embedding-ada-002")

# The embedding models work by converting words, phrases, or even entire documents into mathematical 
# representations known as vectors. These vectors, which exist in a high-dimensional space, capture the meaning 
# and relationships between different words and concepts.

In [59]:
embeddings

AzureOpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000263A8F6F0B0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000263A93B6210>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='2023-05-15', openai_api_base=None, openai_api_type='azure', openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=2048, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True, azure_endpoint='https://azure-openai-learning-vasu.openai.azure.com/', azure_ad_token=None, azure_ad_token_provider=None, azure_ad_async_token_provid

In [60]:
embeddings.embed_query

<bound method OpenAIEmbeddings.embed_query of AzureOpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000263A8F6F0B0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000263A93B6210>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='2023-05-15', openai_api_base=None, openai_api_type='azure', openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=2048, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True, azure_endpoint='https://azure-openai-learning-vasu.openai.azure.com/', azure_ad_token=None, azure_ad_to

In [61]:
# Setting up Azure AI Search on the portal
# 1. Create Azure bloc storage and a container in that 
# 2. Upload csv to your demo container
# 3. Create Azure AI service
# 4. Create a embedding deployment in Azure OpenAI (assuming we already have created this resource)
# 5. Click 'Import and vectorize data'.
# 6. Query this index!

# -or-
#  Create a index from code itself
# 1. Create Azure AI service
# 2. Folloe the below code to create index and add documents

In [63]:
# Connect to Azure Cognitive Search
acs = AzureSearch(azure_search_endpoint=os.getenv('SEARCH_SERVICE_NAME'),
                 azure_search_key=os.getenv('SEARCH_API_KEY'),
                 index_name=os.getenv('SEARCH_INDEX_NAME'),
                 embedding_function=embeddings.embed_query)

In [66]:
print(os.getcwd())

E:\repos\llmsops


In [81]:
from langchain.document_loaders import CSVLoader

# Update the sample file path later
loader = CSVLoader("./rags/wine-ratings.csv", encoding="utf8")
documents = loader.load()

In [82]:
type(documents)

list

In [83]:
len(documents)

32780

In [84]:
documents[0]

Document(metadata={'source': './rags/wine-ratings.csv', 'row': 0}, page_content='id: 0\nname: 1000 Stories Bourbon Barrel Aged Batch Blue Carignan 2016\ngrape: \nregion: Mendocino, California\nvariety: Red Wine\nrating: 91\nnotes: This is a very special, limited release of 1000 Stories Bourbon Barrel-Aged Carignan, their first-ever release of Carignan as a single varietal. Classic and rustic with a little edge. Look for notes of brilliantly racy red and black fruits set to a rich backdrop of toast, herbs, and cocoa.')

In [97]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# What happended here? No visible change

In [98]:
len(docs)

32780

In [102]:
len(docs[0:100])

100

In [103]:
# Time consuming operation!
acs.add_documents(documents=docs[0:100])

['YjBhNjE5ZjUtYjI2MS00ODI1LTg4YmItYmQwNGE5NjA1MzUy',
 'YTcxYjBkMGItYmJiMy00Y2RmLTg2M2MtYzNlNDFkNWY0NTgy',
 'ZDkxYmQxNTQtZTQ3NS00NGQ4LTg1N2YtMTQ3NmU2OTgzN2Mz',
 'ODYzMmYwOWQtZjIxMC00NWU0LWEzNmYtMzczNTliNzJmNjkz',
 'NjRhMjY4MWUtZGJlOS00Njc2LTgwYjktNmQwZWQ1MDY0MTYy',
 'Mjg3NGVjODAtODM2OC00NTRlLWEwYzktMzM5ZmMzZjFmMTYy',
 'OTQxNTY1MTUtZWM0OS00MzFhLWIyMTktZDVjNDY2NDZhY2Ew',
 'MjU1ZmU1YWEtMTk2MS00YjExLWEzNjYtNmExOTM4ZjVjNTZk',
 'NjQ4NzA2ZWYtMjBhNy00YzI3LTliZWMtYTNkN2VjOTMzOWZk',
 'NDA5Y2EyNDQtZDljYy00OTEwLTkyZGYtODgyYjFiMWZjNmRi',
 'NzExMTI4ZjYtNGY3Yy00M2NjLWI5M2EtMTcyZGM2ZjRkOTAy',
 'ZGVmY2IzMjAtMmIzMy00YTQ0LWE1YTAtNjUxN2ZiZWY2N2I0',
 'NDEyNzUxNWUtNTk1OS00MWFiLWE4YWItZjRkMWFlM2NmYWY1',
 'Nzk1MjcwOGItYzJmNC00NTA4LWE3NDItYTJjNWI2ZmFiMWJi',
 'ZDE4YmEyYTItNGMyNS00MzFiLTgwMTItODdlZjNkMzA4Mjgz',
 'MWZhZmRmNjMtN2VlNy00MjliLTg5ZjEtZWMyMTMwZjY1MDQ1',
 'YjQ3Y2U2NGYtOTgyZS00ZjQyLTliYjctZGRiYzNiMDA5NzJl',
 'MTE2MTcyMDYtOGFmYy00NzdjLThhM2YtNDBlNDNlZWM3OGE4',
 'YzRkZjdjMDQtOTcwMS00MmRhLWIxNjctZGVjZTBlNjhk

In [105]:
intDocs = acs.similarity_search_with_relevance_scores(
    query="What is the best Cabernet Sauvignon wine in Napa Valley above 94 points",
    k=5,
)

print(intDocs[0][0].page_content)

id: 20
name: 1849 Declaration Napa Valley Cabernet Sauvignon 2014
grape: 
region: Napa Valley, California
variety: Red Wine
rating: 91
notes: The palate is robust with flavors of dark blueberry, blackberry, traces of red currant, and subtle sweet oak from the barrel. This wine is fruit forward, full-bodied and spreads richly across the palate with soft velvety tannins and a long-lasting finish.


In [106]:
print(dir(intDocs[0][0]))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__firstlineno__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_computed_fields__', '__pydantic_core_schema__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_validator__', '__reduce__', '__reduce_ex__

In [107]:
endpoint = os.getenv("ENDPOINT_URL", os.getenv('AZURE_OPENAI_ENDPOINT'))  
deployment = os.getenv("DEPLOYMENT_NAME", "gpt-4")  
subscription_key = os.getenv("AZURE_OPENAI_API_KEY", os.getenv('OPENAI_API_VERSION_GPT4')) 

In [111]:
# Connect to the large language model
from openai import AzureOpenAI

client = AzureOpenAI(  
    azure_endpoint=endpoint,  
    api_key=subscription_key,  
    api_version="2024-05-01-preview",
)

In [112]:
chat_prompt = [
    {
        "role": "system",
        "content": [
            {
                "type": "text",
                "text": "You are chatbot, a wine specialist. Your top priority is to help guide users into selecting amazing wine and guide them with their requests."
            }
        ]
    },
    {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What is the best Cabernet Sauvignon wine in Napa Valley above 94 points."
            }
        ]
    },
    {
        "role": "assistant",
        "content": [
            {
                "type": "text",
                "text": str(intDocs)
            }
        ]
    }
] 

In [113]:
completion = client.chat.completions.create(  
    model=deployment,
    messages=chat_prompt,
    max_tokens=800,  
    temperature=0.7,  
    top_p=0.95,  
    frequency_penalty=0,  
    presence_penalty=0,
    stop=None,  
    stream=False
)

In [114]:
print(completion.choices[0].message)

ChatCompletionMessage(content="In Napa Valley, there are numerous exceptional Cabernet Sauvignon wines rated above 94 points. Here are a few top-rated options:\n\n1. **Screaming Eagle Cabernet Sauvignon** - Often achieving near-perfect scores, this iconic wine is known for its complexity, depth, and elegance.\n\n2. **Harlan Estate** - This wine is highly acclaimed for its rich texture, intense fruit flavors, and remarkable balance, frequently scoring above 95 points.\n\n3. **Opus One** - A joint venture between Baron Philippe de Rothschild and Robert Mondavi, this wine is a staple in the high-scoring category, known for its finesse and structure.\n\n4. **Shafer Vineyards Hillside Select** - Consistently rated above 94 points, this wine is celebrated for its powerful tannins that are both lush and well-integrated.\n\n5. **Caymus Vineyards Special Selection** - This wine is another high achiever, known for its opulent fruit flavors and velvety texture.\n\nThese wines are not only high sc