In [17]:
from langchain_community.document_loaders import JSONLoader, CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter


import json
from pathlib import Path
import pandas as pd
from typing import Callable, Dict, List, Optional, Union

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader

In [18]:
file_name_overview = 'overview'
file_name_people = 'people'

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [20]:
def get_or_create_db(file_name):
    file_path=f"./data/{file_name}.csv"

    # Define your persist directory
    persist_directory = f"./chroma_db_{file_name}"

    # Attempt to load the database
    db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
    # Check if the database was loaded successfully
    df = pd.read_csv(file_path, encoding="utf-8")

    if not db:
        loader = CSVLoader(
            file_path=file_path,
            encoding="utf-8",
            csv_args={
                "delimiter": ",",
                "quotechar": '"',
                "fieldnames": df.columns.to_list(),
            },
        )
        documents = loader.load()

        # Create the database from documents if it wasn't loaded
        db = Chroma.from_documents(documents[:5], embeddings, persist_directory=persist_directory)
    
    return db

def get_retriever(db):
    retriever = db.as_retriever()
    return retriever

def get_similarity_search(query, db):
    docs = db.similarity_search(query)
    return docs

In [22]:
file_name_overview = 'overview'
file_name_people = 'people'

db_people = get_or_create_db(file_name_people)


ImportError: Could not import chromadb python package. Please install it with `pip install chromadb`.

In [None]:
db_overview = get_or_create_db(file_name_overview)

retriever_overview = get_retriever(db_overview)
retriever_people = get_retriever(db_people)

In [None]:
template = """Answer the question based only on the following context:
{context_overview}

{context_people}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {
        "context_overview": retriever_overview, 
        "context_people": retriever_people,
        "question": RunnablePassthrough()
    }
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
query = "Which companies provide peptide synthesis services or products?"

docs_overview = get_similarity_search(query, db_overview)
docs_people = get_similarity_search(query, db_people)

print(docs_overview)
print(docs_people)

In [None]:

print(chain.invoke(query))