In [3]:
# !pip install langchain 
# !pip install streamlit
# !pip install langchain-openai

In [None]:
import os
from dotenv import load_dotenv


def load_config():
    load_dotenv()
    config = {
        "CSV_INPUT_PATH": os.path.join("../datas", "gutenberg.csv"),
        "CSV_CLEANED_PATH": os.path.join("../datas", "gutenberg2.csv"),
        "EMBEDDINGS_FILE": os.path.join("../models", "embeddings.csv"),
        "DOCS_FILE": os.path.join("../models", "docs.csv"),
        "FAISS_INDEX": os.path.join("../models", "faiss_index.index"),
        "CHUNK_SIZE": int(500),
        "AZURE_OPENAI_API_KEY": os.getenv("AZURE_OPENAI_API_KEY"),
        "AZURE_OPENAI_API_BASE": os.getenv("AZURE_OPENAI_API_BASE"), 
        "AZURE_OPENAI_API_ENDPOINT": os.getenv("AZURE_OPENAI_API_ENDPOINT"),
        "AZURE_DEPLOYMENT_NAME": os.getenv("AZURE_DEPLOYMENT_NAME"),
        "AZURE_API_VERSION": os.getenv("AZURE_API_VERSION"),
        "AZURE_DEPLOYEMENT": os.getenv("AZURE_DEPLOYEMENT"),
        "LANGCHAIN_ENDPOINT": os.getenv("LANGCHAIN_ENDPOINT"),
        "LANGCHAIN_API_KEY": os.getenv("LANGCHAIN_API_KEY")
    }
    return config

In [None]:
config = load_config()
LANGCHAIN_ENDPOINT = config["LANGCHAIN_ENDPOINT"]
LANGCHAIN_API_KEY = config["LANGCHAIN_API_KEY"]
AZURE_OPENAI_API_BASE = config["AZURE_OPENAI_API_BASE"]
AZURE_OPENAI_API_KEY = config["AZURE_OPENAI_API_KEY"]
AZURE_DEPLOYEMENT = config["AZURE_DEPLOYEMENT"]
CHUNK_SIZE = config["CHUNK_SIZE"]
AZURE_OPENAI_API_ENDPOINT = config["AZURE_OPENAI_API_ENDPOINT"]
AZURE_DEPLOYMENT_NAME = config["AZURE_DEPLOYMENT_NAME"]
AZURE_OPENAI_API_KEY = config["AZURE_OPENAI_API_KEY"]
AZURE_API_VERSION = config["AZURE_API_VERSION"]

In [None]:
import streamlit as sl
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

# Charger la base de connaissances à partir d'un fichier CSV
def load_knowledgeBase(csv_file_path):
    embeddings = AzureOpenAIEmbeddings(
        azure_endpoint=AZURE_OPENAI_API_ENDPOINT,
        openai_api_key=AZURE_OPENAI_API_KEY,
        azure_deployment=AZURE_DEPLOYEMENT,
    )
    loader = CSVLoader(file_path=csv_file_path)
    documents = loader.load()
    vectorstore = FAISS.from_documents(documents, embeddings)
    return vectorstore

# Charger le prompt
def load_prompt():
    prompt = """
    Vous devez répondre à la question comme indiqué dans le contenu du CSV.
    Contexte = {context}
    Question = {question}
    Si la réponse n'est pas dans le CSV, répondez "Je ne sais pas de quoi vous parlez.
    """
    return ChatPromptTemplate.from_template(prompt)

# Charger le modèle de chat Azure OpenAI
def load_llm():
    return AzureChatOpenAI(
        azure_endpoint=AZURE_OPENAI_API_ENDPOINT,
        azure_deployment=AZURE_DEPLOYMENT_NAME,
        openai_api_key=AZURE_OPENAI_API_KEY,
        api_version=AZURE_API_VERSION,
        temperature=0
    )

# Fonction pour formater les documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

if __name__ == '__main__':
    sl.header("Bienvenue sur le bot CSV")
    csv_file_path = '..\datas\gutenberg2.csv'
    knowledgeBase = load_knowledgeBase(csv_file_path)
    llm = load_llm()
    prompt = load_prompt()
    
    query = sl.text_input('Entrez du texte')
    if query:
        similar_embeddings = knowledgeBase.similarity_search(query)
        retriever = similar_embeddings.as_retriever()
        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )
        response = rag_chain.invoke(query)
        sl.write(response)