# Utilisation du LLM préinstallé dans Ollama, de l'Embedding, d'une base FAISS et suivi dans MLflow

## Lien avec MLFlow

In [1]:
import mlflow
mlflow.set_tracking_uri("http://mlflow-serveur:8080/")
mlflow.set_experiment("rag")
mlflow.langchain.autolog()


## Lecture du texte en entrée

In [2]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("data/paul_graham_essay.txt")
documents = loader.load()

## Découpage du texte

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Split into chunks 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks")

Split into 347 chunks


## Lien avec L'embedding

In [4]:
from langchain.embeddings.infinity import InfinityEmbeddings
from langchain.docstore.document import Document

embeddings = InfinityEmbeddings(model="intfloat/multilingual-e5-base", infinity_api_url="http://embedding:7997")

## Transformation des bouts de texte en vecteurs (pour en déduire une direction)

In [6]:
from langchain.vectorstores import FAISS

vecotr_index = FAISS.from_documents(chunks, embeddings)

## Lien avec le LLM

In [7]:
from langchain_core.prompts import PromptTemplate
from langchain.llms import Ollama

model = Ollama(verbose=True,
               base_url="http://ollama:11434",
               model="llama3",
               )

## Fabrication d'une chaine Question / Réponse (QA) + RAG avec les éléments définis au dessus

In [8]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import RetrievalQA
chain = RetrievalQA.from_chain_type(llm=model,
                                             retriever=vecotr_index.as_retriever(),
                                            )

In [9]:
chain

RetrievalQA(combine_documents_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=Ollama(verbose=True, base_url='http://ollama:11434', model='llama3')), document_variable_name='context'), retriever=VectorStoreRetriever(tags=['FAISS', 'InfinityEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f57e122e5d0>))

## Test avec une question, puis remontée dans MLflow

In [None]:
query = "What were the two main things the author worked on before college?"

print(chain.invoke({"query": query}, return_only_outputs=True))
mlflow.models.set_model(model=chain)

# Personnalisation de l'instruction générative (Prompt)

In [None]:
template = '''
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know.
Don't try to make up an answer.
{context}

Question: {question}
Answer:
'''
prompt = PromptTemplate(
    template=template, 
    input_variables=[
        'context', 
        'question',
    ]
)

# Initialise RetrievalQA Chain
chain = RetrievalQA.from_chain_type(
    model,
    retriever=vecotr_index.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

response = chain({"query": query})

print(response) 