In [1]:
import os
import logging
import psutil
import chromadb
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [4]:
class RAGPipeline:

    def __init__(self, model_name="llama3.2", embedding_model="sentence-transformers/all-MiniLM-L6-v2", docs_folder="documents"):
        self.setup_logging()
        self.check_memory()
        self.model_name = model_name
        self.embedding_model = embedding_model
        self.docs_folder = docs_folder

        self.embeddings = HuggingFaceEmbeddings(model_name=self.embedding_model)
        self.llm = ChatOllama(model=self.model_name)

        self.documents = self.load_documents()
        self.vectorstore = self.create_vectorstore()
        self.rag_chain = self.setup_rag_chain()

    #############################

    def setup_logging(self):
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

    #############################

    def check_memory(self):
        available_memory = psutil.virtual_memory().available / (1024 ** 3)
        self.logger.info(f"Memoria disponibile: {available_memory:.1f} GB")
        if available_memory < 4.0:
            self.logger.warning("Bassa memoria! Potrebbero esserci rallentamenti.")
    
    #############################

    def load_documents(self):
        all_documents = []
        for file in os.listdir(self.docs_folder):
            file_path = os.path.join(self.docs_folder, file)

            if file.endswith(".pdf"):
                loader = PyPDFLoader(file_path)
            elif file.endswith(".txt"):
                loader = TextLoader(file_path)
            else:
                continue  # Ignora file non supportati

            documents = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
            chunks = text_splitter.split_documents(documents)

            # Aggiunta metadati con nome file e tipo di dato
            for chunk in chunks:
                chunk.metadata["file"] = file
                chunk.metadata["type"] = "text"

            all_documents.extend(chunks)
        
        self.logger.info(f"Caricati {len(all_documents)} chunk da {len(os.listdir(self.docs_folder))} file.")
        return all_documents
    
    #############################

    def create_vectorstore(self):
        chroma_client = chromadb.PersistentClient(path="chromadb_index")
        collection = chroma_client.get_or_create_collection(name="rag_collection")

        for i, doc in enumerate(self.documents):
            collection.add(
                documents=[doc.page_content], 
                metadatas=[doc.metadata], 
                ids=[str(i)]
            )

        return collection
    
    #############################

    def setup_rag_chain(self):
        def chromadb_retriever(query_text):
            results = self.vectorstore.query(query_texts=[query_text], n_results=3)
            return results["documents"]

        template = """
        You are an assistant for question-answering tasks.
        Use the following pieces of retrieved context to answer the question:
        If you don't know the answer, then do not answer from your own knowledge.
        Keep the answer concise.
        
        #### Retrieved Context ####
        {context}
        
        #### Question ####
        {question}
        
        #### LLM Response ####
        """

        prompt = ChatPromptTemplate.from_template(template)

        return (
            {"context": chromadb_retriever, "question": lambda x: x} 
            | prompt 
            | self.llm 
            | StrOutputParser()
        )
    
    #############################

    def query(self, question):
        memory_usage = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024
        self.logger.info(f"Uso memoria: {memory_usage:.1f} MB")
        return self.rag_chain.invoke(question)

In [5]:
if __name__ == "__main__":
    rag = RAGPipeline()  # Evitiamo parametri ridondanti

    while True:
        query = input("Inserisci una domanda (o 'exit' per uscire): ")
        if query.lower() == "exit":
            break
        print(rag.query(query))

INFO:__main__:Memoria disponibile: 1.1 GB
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
INFO:__main__:Caricati 1674 chunk da 4 file.
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:__main__:Uso memoria: 669.5 MB
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


I don't have any context for the question "Hi!". Can you please provide more information or clarify what you are asking?


INFO:__main__:Uso memoria: 23.1 MB
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


I don't have enough context to answer this question.


INFO:__main__:Uso memoria: 92.7 MB
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Shapley values were first introduced by L. Shapley in the context of game theory and are used to explain the predictions of ML models.


INFO:__main__:Uso memoria: 94.2 MB
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


I don't have enough context to provide an answer. Please provide more information or clarify the question so I can assist you better.
