In [2]:
import os
from dotenv import load_dotenv


def load_config():
    load_dotenv()
    config = {
        "CSV_INPUT_PATH": os.path.join("../datas", "gutenberg.csv"),
        "CSV_CLEANED_PATH": os.path.join("../datas", "gutenberg2.csv"),
        "EMBEDDINGS_FILE": os.path.join("../models", "embeddings.csv"),
        "DOCS_FILE": os.path.join("../models", "docs.csv"),
        "FAISS_INDEX": os.path.join("../models", "faiss_index.index"),
        "CHUNK_SIZE": int(500),
        "AZURE_OPENAI_API_KEY": os.getenv("AZURE_OPENAI_API_KEY"),
        "AZURE_OPENAI_API_BASE": os.getenv("AZURE_OPENAI_API_BASE"), 
        "AZURE_OPENAI_API_ENDPOINT": os.getenv("AZURE_OPENAI_API_ENDPOINT"),
        "AZURE_DEPLOYMENT_NAME": os.getenv("AZURE_DEPLOYMENT_NAME"),
        "AZURE_API_VERSION": os.getenv("AZURE_API_VERSION"),
        "AZURE_DEPLOYEMENT": os.getenv("AZURE_DEPLOYEMENT"),
        "LANGCHAIN_ENDPOINT": os.getenv("LANGCHAIN_ENDPOINT"),
        "LANGCHAIN_API_KEY": os.getenv("LANGCHAIN_API_KEY")
    }
    return config

In [6]:
config = load_config()
LANGCHAIN_ENDPOINT = config["LANGCHAIN_ENDPOINT"]
LANGCHAIN_API_KEY = config["LANGCHAIN_API_KEY"]
AZURE_OPENAI_API_BASE = config["AZURE_OPENAI_API_BASE"]
AZURE_OPENAI_API_KEY = config["AZURE_OPENAI_API_KEY"]
AZURE_DEPLOYEMENT = config["AZURE_DEPLOYEMENT"]
CHUNK_SIZE = config["CHUNK_SIZE"]
AZURE_OPENAI_API_ENDPOINT = config["AZURE_OPENAI_API_ENDPOINT"]
AZURE_DEPLOYMENT_NAME = config["AZURE_DEPLOYMENT_NAME"]
AZURE_OPENAI_API_KEY = config["AZURE_OPENAI_API_KEY"]
AZURE_API_VERSION = config["AZURE_API_VERSION"]

In [7]:
# RAG prompt
from langchain import hub

# Loads the latest version
prompt = hub.pull("rlm/rag-prompt")

In [None]:
# Load docs
from langchain.document_loaders import CSVLoader

loader = CSVLoader('../datas/gutenberg2.csv')
data = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

# Store splits
from langchain_openai.embeddings import AzureOpenAIEmbeddings  
from langchain.vectorstores import Chroma

embedding_model = AzureOpenAIEmbeddings(
    azure_endpoint=AZURE_OPENAI_API_BASE,
    openai_api_key=AZURE_OPENAI_API_KEY,
    azure_deployment='text-embedding-ada-002',
    chunk_size=1
)

vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_model)

# LLM
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
            azure_endpoint=AZURE_OPENAI_API_ENDPOINT,
            azure_deployment=AZURE_DEPLOYMENT_NAME,
            openai_api_key=AZURE_OPENAI_API_KEY,
            api_version="2023-05-15",
            temperature=0.0
        )

In [None]:
# RetrievalQA
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={"prompt": prompt}
)

In [None]:
question = "Qui a écrit Paradise Regained ?"
result = qa_chain.invoke({"query": question})
result

In [None]:
from langchain import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import AzureChatOpenAI

# Chargement des données
loader = CSVLoader('path/to/gutenberg_cleaned_100.csv')
data = loader.load()

# Split des documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

# Création d'un magasin de vecteurs
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_model)

# Configuration de l'agent
llm = AzureChatOpenAI(azure_endpoint='...', azure_deployment='...', openai_api_key='...')
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever())

# Fonction pour récupérer les personnages
def extract_characters(summary):
    import re
    return re.findall(r'\b[A-Z][a-z]*\b', summary)

# Fonction pour récupérer le texte d'un livre
def fetch_book_text(book_title):
    # Code pour télécharger le texte depuis Gutenberg
    pass

# Exemple d'interaction
user_question = "Quels sujets traite Paradise Regained ?"
result = qa_chain.invoke({"query": user_question})
print(result)

# Récupérer les personnages
summary = "Résumé du livre ici."
characters = extract_characters(summary)
print(characters)