# load single file csv

In [1]:
from langchain_community.document_loaders.csv_loader import CSVLoader

In [2]:
FILE_PATH = 'questions.csv'

In [3]:
loader = CSVLoader(file_path=FILE_PATH)
documents = loader.load()

In [4]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma

In [5]:
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [6]:
db = Chroma.from_documents(documents, embedding_function)

In [7]:
query = "Ask me a question about storage on cloud?"

docs = db.similarity_search(query)

print(docs[0].page_content)

question: There is a requirement to store objects. The objects are downloaded via a URL. Which storage option would you choose?
Amazon Storage Gateway
Amazon S3 
Amazon EBS
Amazon Glacier


# load multi files csv

In [8]:
from langchain_community.document_loaders import DirectoryLoader
from langchain.document_loaders.csv_loader import CSVLoader

In [9]:
FILES_PATH = '../resource/exams/'

In [10]:
loader = DirectoryLoader(FILES_PATH,  glob="*.csv",
                         show_progress=True,
                         loader_cls=CSVLoader)

In [11]:
documents = loader.load()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 1067.33it/s]


In [12]:
# from langchain.embeddings import OpenAIEmbeddings
# embedding = OpenAIEmbeddings()

In [13]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [14]:
# create embedding
embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [15]:
from langchain.vectorstores import Chroma

In [16]:
persist_directory = '../chroma/'

In [17]:
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding,
    persist_directory=persist_directory
)

In [18]:
print(vectordb._collection.count())

460


In [19]:
# Load chromadb langchain
loaded_vectordb = Chroma(
    persist_directory = persist_directory,
    embedding_function = embedding
)

In [20]:
query = "Ask my the questions about billing?"

In [21]:
docs = loaded_vectordb.similarity_search(query,k=3)
len(docs)

3

In [22]:
print(docs[0].page_content)

question: You are an AWS Enterprise customer with questions about billing and you overall AWS account? Which of the following AWS support personnel should you contact?
AWS Support 
AWS Concierge
AWS Billing and Accounts
AWS Technical Account Manager


In [23]:
print(docs[1].page_content)

question: Which of the following options is the recommended way to get billing support on AWS?
Open a billing support case 
Use third-party partners and tools
Contact your account manager
Use the AWS Billing and Cost Management dashboard


In [24]:
print(docs[2].page_content)

question: Which of the following options is the recommended way to get billing support on AWS?
Contact your account manager
Use the AWS Billing and Cost Management dashboard 
Use third-party partners and tools
Open a billing support case


# references

[How to use CSV files in vector stores with Langchain](https://how.wtf/how-to-use-csv-files-in-vector-stores-with-langchain.html)