In [None]:
from langchain.community.llms.ollama import Ollama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders.web.cheerio import CheerioWebBaseLoader
from langchain.community.embeddings.ollama import OllamaEmbeddings
from langchain.vectorstores.memory import MemoryVectorStore
from langchain.document_loaders.fs.directory import DirectoryLoader
from langchain.document_loaders.fs.pdf import PDFLoader
from langchain.document_loaders.fs.docx import DocxLoader
from langchain.document_loaders.fs.csv import CSVLoader
from langchain.document_loaders.fs.text import TextLoader

# Load all PDFs within the specified directory
# NOTE: results are logged to LangSmith if configured
directory_loader = DirectoryLoader(
    "/Users/ongbt/Downloads/psdsrc",
    {
        ".pdf": lambda path: PDFLoader(path),
        ".docx": lambda path: DocxLoader(path),
        ".csv": lambda path: CSVLoader(path, "text"),
        ".txt": lambda path: TextLoader(path),
    }
)

docs = directory_loader.load()

# Additional steps: Split text into chunks with any TextSplitter. You can then use it as context or save it to memory afterwards.
text_splitter = RecursiveCharacterTextSplitter(chunkSize=1000, chunkOverlap=200)

split_docs = text_splitter.splitDocuments(docs)

embeddings = OllamaEmbeddings()
vector_store = MemoryVectorStore.fromDocuments(split_docs, embeddings)

retriever = vector_store.asRetriever(4)
docs3 = retriever.invoke("what are the 13 measures?")
print(docs3)
