In [None]:
#IMPLEMENTATION 1

In [1]:
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings

In [2]:
def get_insights(question):
    # Load and process the CSV data
    loader = CSVLoader("pokemon.csv")
    documents = loader.load()

    # Create embeddings
    embeddings = OllamaEmbeddings(model="nomic-embed-text")

    chroma_db = Chroma.from_documents(
        documents, embeddings, persist_directory="./chroma_db"
    )
    chroma_db.persist()

    llm = Ollama(model="llama3")

    prompt_template = PromptTemplate(
        input_variables=["context"],
        template="Given this context: {context}, please directly answer the question: {question}.",
    )

    # Set up the question-answering chain
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=chroma_db.as_retriever(),
        chain_type_kwargs={"prompt": prompt_template},
    )
    print(chroma_db.as_retriever())
    result = qa_chain({"query": question})
    return result

In [3]:
response = get_insights("whats this about?")

tags=['Chroma', 'OllamaEmbeddings'] vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7411805add10>


  warn_deprecated(


In [4]:
print(response)

{'query': 'whats this about?', 'result': 'This appears to be a list of Pokémon with their various stats and characteristics. The names are formatted like "Name: HoopaHoopa Confined" and include information such as:\n\n* Type(s) (e.g. Dragon, Ground)\n* Total base stat\n* HP (health points), Attack, Defense, Special Attack, Special Defense, and Speed\n* Generation number (indicating the Pokémon\'s introduction in a particular generation of games)\n* Legendary status (True or False)\n\nIt seems that this list may be used for reference or comparison purposes, possibly for competitive battling or game strategy.'}


In [None]:
#IMPLEMENTATION 2

In [7]:

loader = CSVLoader("pokemon.csv")
documents = loader.load()


In [8]:
# Add documents to vector database in addition to persistant directory (execute this or the one below)
vector_db = Chroma.from_documents(
    documents=documents, 
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag",
    persist_directory="./chroma_db_csv"
)


OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████| 800/800 [01:49<00:00,  7.29it/s]


In [None]:
#Load from existing persistent directory (execute this or the one above)

vector_db = Chroma(
    persist_directory="./chroma_db_csv",
    embedding_function=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

In [10]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [11]:
# LLM from Ollama
local_model = "llama3"
llm = ChatOllama(model=local_model)

In [12]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [13]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [14]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [15]:
#Enter your question in the bracket and press enter
chain.invoke(input(""))

 how many pokemons are there?


OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.07s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.32it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.01it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.07it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.66it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.44it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 15.89it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.97it/s]
OllamaEmbeddings: 100%|█████████████████

'According to the data you provided, there are 12 Pokémon:\n\n1. MewtwoMega Mewtwo X\n2. Celebi\n3. MetagrossMega Metagross\n4. Hippopotas\n5. Vulpix\n6. Jirachi\n7. GroudonPrimal Groudon\n8. Kadabra\n9. Exeggcute\n10. Grovyle\n11. SceptileMega Sceptile\n12. Cherubi'

In [16]:
chain.invoke("whats this about?")

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.04s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 45.05it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.00it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.37it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.89it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.69it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.52it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.57it/s]
OllamaEmbeddings: 100%|█████████████████

'A fascinating question!\n\nIt appears that you\'re looking at a collection of documents or records, possibly from a Pokémon database. Each document contains information about a specific Pokémon species, including its name, types (e.g., Fire/Fighting), total stats (HP, Attack, Defense, etc.), and other details.\n\nThe metadata associated with each document suggests that they come from a CSV file called "pokemon.csv", and the row number and generation number are also provided. This could be useful for someone trying to analyze or work with Pokémon data.\n\nIf you have any specific questions about these documents or would like me to help with anything related, feel free to ask!'