# **CSV Analysis**
#https://www.tetranyde.com/blog/langchain-vectordb

In [None]:
# Install libraries
!pip install langchain chromadb

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community<0.4.0,>=0.3.0 (from langchain_experimental)
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community<0.4.0,>=0.3.0->langchain_experimental)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typ

In [None]:
from langchain.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings


Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.4-py3-none-any.whl (209 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain_experimental
Successfully installed langchain_experimental-0.3.4


In [None]:
def get_insights(question):
    # Load and process the CSV data
    loader = CSVLoader("data.csv")
    documents = loader.load()

    # Create embeddings
    embeddings = OllamaEmbeddings(model="llama2")

    chroma_db = Chroma.from_documents(
        documents, embeddings, persist_directory="./chroma_db"
    )
    chroma_db.persist()

    llm = Ollama(model="llama2")

    prompt_template = PromptTemplate(
        input_variables=["context"],
        template="Given this context: {context}, please directly answer the question: {question}.",
    )

    # Set up the question-answering chain
    qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=chroma_db.as_retriever(),
        chain_type_kwargs={"prompt": prompt_template},
    )
    print(chroma_db.as_retriever())
    result = qa_chain({"query": question})
    return result


   ID       heure_debut         heure_fin messagerie utilisation  \
0  14  2/17/25 14:12:50  2/17/25 14:17:49  anonymous         Oui   
1  15  2/17/25 14:11:39  2/17/25 14:24:06  anonymous         Oui   
2  16  2/17/25 14:15:44  2/17/25 14:27:10  anonymous         Oui   
3  17  2/17/25 14:25:24  2/17/25 14:40:47  anonymous         Oui   
4  18  2/17/25 14:42:08  2/17/25 14:42:25  anonymous         Non   

           entite  utilisation_frequence  \
0        Commerce                    3.0   
1        Commerce                    3.0   
2        Commerce                    5.0   
3  Bid management                    5.0   
4             NaN                    NaN   

                                              tache1  \
0  En tant qu'outil d'aide a la redaction de prop...   
1  En tant qu'outil d'aide a la redaction de prop...   
2  En tant qu'outil d'aide a la redaction d'offre...   
3  En tant qu'outil d'aide a la redaction d'offre...   
4                                             

In [None]:
[Document(page_content='ï»¿Student_Name: Jack\nSubject: Math\nMarks: 90', metadata={'source': 'Marks.csv', 'row': 0}), Document(page_content='ï»¿Student_Name: John\nSubject: Math\nMarks: 60', metadata={'source': 'Marks.csv', 'row': 1}), Document(page_content='ï»¿Student_Name: Mary\nSubject: Math\nMarks: 70', metadata={'source': 'Marks.csv', 'row': 2}), Document(page_content='ï»¿Student_Name: Peter\nSubject: Math\nMarks: 80', metadata={'source': 'Marks.csv', 'row': 3})]

In [None]:
# Check embeddings
embeddings = OllamaEmbeddings(model="llama2")
query_result = embeddings.embed_query(documents)
print(query_result[:5])

Creating a front


In [None]:
pip install streamlit

In [None]:
import streamlit as st
import langchain_helper as lch

st.title("Query Student Records")

question = st.sidebar.text_area(label=f"Ask a question")

if st.sidebar.button("Get Answer"):
    response = lch.get_insights(question)

    st.write("Answer:", response)

In [None]:
streamlit run streamlit_ui.py