# LangChain: Documents

How to query a product catalog for items of interest.

In [5]:
import langchain.document_loaders
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
# account for deprecation of LLM model
import datetime

# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [14]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import CSVLoader

In [15]:
file = 'data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)


In [None]:
# !pip install docarray

In [24]:

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [20]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [189]:
response = index.query(query)

ValidationError: 2 validation errors for DocArrayDoc
text
  Field required [type=missing, input_value={'embedding': [-0.0220552... -0.021760985558600985]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
metadata
  Field required [type=missing, input_value={'embedding': [-0.0220552... -0.021760985558600985]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing

## Step By Step

In [190]:
loader = CSVLoader(file_path=file)

In [191]:
docs = loader.load()

In [192]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'data/OutdoorClothingCatalog_1000.csv', 'row': 0})

In [193]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [194]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [195]:
print(len(embed))

1536


In [201]:
print(embed[:5])

[-0.022217182282092563, 0.006519303144370566, -0.018256563561435398, -0.03920383350550179, -0.014358812839401042]


In [184]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [197]:
query = "Please suggest a shirt with sunblocking"

In [200]:
docs = db.similarity_search(query)

ValidationError: 2 validation errors for DocArrayDoc
text
  Field required [type=missing, input_value={'embedding': [-0.0220552... -0.021760985558600985]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing
metadata
  Field required [type=missing, input_value={'embedding': [-0.0220552... -0.021760985558600985]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.5/v/missing

In [199]:
len(docs)

1000