In [1]:
import os
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [8]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

from langchain_openai import ChatOpenAI, AzureChatOpenAI

llm = AzureChatOpenAI(
    max_tokens=16000,
    temperature=0.0,
    deployment_name="gpt-4o-b",
    model_name="gpt-4o",
    # deployment_name="gpt-35-turbo",
    # model_name="gpt-3.5-turbo",
)
print(f'Using Azure endpoint: {llm.azure_endpoint}')

# Test if LLM responds OK
prompt = "What is your training data cutoff date?"
response = llm(prompt)
print(response.content)

Using Azure endpoint: https://summarilyazureopenai.openai.azure.com
My training data cutoff date is **October 2021**. This means I was trained on information available up until that time, and I don't have knowledge of events, developments, or updates that occurred after that date. Let me know how I can assist you!


In [3]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
docs = loader.load()
print(len(docs))
print(docs[0])

1000
page_content=': 0
name: Women's Campside Oxfords
description: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. 

Size & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. 

Specs: Approx. weight: 1 lb.1 oz. per pair. 

Construction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. 

Questions? Please contact us for any inquiries.' metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0}


In [4]:
import pandas as pd
df = pd.read_csv(file)
df.head()

Unnamed: 0.1,Unnamed: 0,name,description
0,0,Women's Campside Oxfords,This ultracomfortable lace-to-toe Oxford boast...
1,1,"Recycled Waterhog Dog Mat, Chevron Weave",Protect your floors from spills and splashing ...
2,2,Infant and Toddler Girls' Coastal Chill Swimsu...,"She'll love the bright colors, ruffles and exc..."
3,3,"Refresh Swimwear, V-Neck Tankini Contrasts",Whether you're going for a swim or heading out...
4,4,EcoFlex 3L Storm Pants,Our new TEK O2 technology makes our four-seaso...


In [5]:
# To get some idea what should be the chunk size in embeddings
lens = [len(doc.page_content) for doc in docs]
print(max(lens))

1294


In [6]:
from langchain.embeddings import AzureOpenAIEmbeddings
embeddings = AzureOpenAIEmbeddings(
    deployment="text-embedding-3-small",
    chunk_size=1024)
embed = embeddings.embed_query("Hi my name is Harrison")
len(embed)

  embeddings = AzureOpenAIEmbeddings(


1536

In [7]:
docs = docs[:500]  # Limit to 500 for testing
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)



In [9]:
query = "Please suggest a shirt with sunblocking"

In [10]:
retrieved_doc = db.similarity_search(query)

In [12]:
retrieved_doc[0].page_content

': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.'

In [13]:
qdocs = "".join([doc.page_content for doc in retrieved_doc])

In [22]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 

  response = llm.call_as_llm(f"{qdocs} Question: Please list all your \


In [25]:
display(Markdown(response))

Here is a table summarizing all the shirts with sun protection:

| **Name**                              | **Summary**                                                                                                                                                                                                                     |
|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **Sun Shield Shirt**                  | High-performance sun shirt with UPF 50+ protection, moisture-wicking, and abrasion resistance. Made of 78% nylon and 22% Lycra Xtra Life fiber. Slightly fitted and falls at the hip. Handwash and line dry.                     |
| **Men's Plaid Tropic Shirt**          | Lightweight short-sleeve shirt with UPF 50+ protection, wrinkle-free fabric, and quick-drying properties. Made of 52% polyester and 48% nylon. Features cape venting and two front bellows pockets. Machine washable and dryable. |
| **Girls' Ocean Breeze Long-Sleeve Stripe Shirt** | Long-sleeve rash guard with UPF 50+ protection, quick-drying, fade-resistant, and seawater-resistant fabric. Made of a nylon Lycra®-elastane blend. Machine wash and line dry. Coordinates with swimsuits.                        |
| **Perform-A-Tex Woven Shirt**         | Versatile summer shirt with UPF 40+ protection, quick-drying in under 14 minutes, and moisture-wicking. Made of 100% nylon. Slightly fitted and falls at the hip. Machine washable and dryable.                                   |