In [None]:
# chatbot.py

def greet():
    return "Hello! How can I help you today?"

def respond(user_input):
    # Simple rule-based responses
    if "hello" in user_input.lower() or "hi" in user_input.lower():
        return "Hello there!"
    elif "how are you" in user_input.lower():
        return "I'm doing well, thank you!"
    elif "bye" in user_input.lower() or "goodbye" in user_input.lower():
        return "Goodbye! Have a great day."
    else:
        return "I'm sorry, I don't understand.  Can you rephrase?"

def main():
    print(greet())
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            break
        response = respond(user_input)
        print("Bot:", response)

if __name__ == "__main__":
    main()


In [None]:
import nltk
from nltk.chat.util import Chat, reflections

# Defina pares de padrões e respostas
pairs = [
    [r'hello|hi', ['Hello there!', 'Hi! How can I assist you?']],
    [r'how are you?', ['I am good, thank you!', 'Doing well, and you?']],
    [r'bye|goodbye', ['Goodbye! Have a great day!', 'See you later!']],
    [r'(.*)', ['I am sorry, I do not understand. Can you rephrase?']]
]

# Crie um chatbot usando NLTK
chatbot = Chat(pairs, reflections)

# Inicie o chatbot
print('Hello! I am a chatbot. Type exit to end the conversation.')
while True:
    user_input = input('You: ')
    if user_input.lower() == 'exit':
        break
    response = chatbot.respond(user_input)
    print('Bot:', response)


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def initialize_dialogpt(model_name="microsoft/DialoGPT-medium"):
    """
    Inicializa o tokenizer e o modelo DialoGPT.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

def generate_response(tokenizer, model, chat_history_ids, user_input, max_length=1000):
    """
    Gera uma resposta baseada no histórico da conversa e na entrada do usuário.
    
    Parâmetros:
        tokenizer: O tokenizer do modelo.
        model: O modelo de linguagem.
        chat_history_ids: Histórico de tokens da conversa.
        user_input: A mensagem do usuário.
        max_length: Tamanho máximo da resposta.
    
    Retorna:
        response: Resposta do chatbot.
        chat_history_ids: Histórico atualizado da conversa.
    """
    # Codifica a entrada do usuário
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt")
    
    # Concatena o histórico anterior com a nova entrada
    bot_input_ids = new_input_ids if chat_history_ids is None else torch.cat([chat_history_ids, new_input_ids], dim=-1)
    
    # Gera a resposta considerando o contexto
    chat_history_ids = model.generate(bot_input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id)
    
    # Extrai apenas a parte da resposta gerada (exclui a entrada do usuário)
    response_ids = chat_history_ids[:, bot_input_ids.shape[-1]:]
    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    
    return response, chat_history_ids

def chat():
    """
    Função principal para interagir com o chatbot.
    
    Comandos:
        "exit"  -> Finaliza a conversa.
        "reset" -> Limpa o contexto da conversa.
    """
    print("Inicializando o chatbot...")
    tokenizer, model = initialize_dialogpt()
    print("Chatbot inicializado!\nDigite 'exit' para sair ou 'reset' para limpar o contexto.\n")
    
    chat_history_ids = None

    while True:
        user_input = input("Você: ")
        if user_input.lower() == "exit":
            print("Chatbot: Até logo!")
            break
        if user_input.lower() == "reset":
            chat_history_ids = None
            print("Chatbot: Contexto da conversa limpo.")
            continue
        
        response, chat_history_ids = generate_response(tokenizer, model, chat_history_ids, user_input)
        print("Chatbot:", response)

if __name__ == "__main__":
    chat()


Inicializando o chatbot...
Chatbot inicializado!
Digite 'exit' para sair ou 'reset' para limpar o contexto.

Chatbot: Contexto da conversa limpo.


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Chatbot: I'm not sure if you're being serious or not, but I'm going to assume you're being serious.


In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def initialize_dialogpt(model_name="microsoft/DialoGPT-medium"):
    """
    Initializes the tokenizer and the DialoGPT model.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return tokenizer, model

def generate_response(tokenizer, model, chat_history_ids, user_input, max_length=1000):
    """
    Generates a response based on the chat history and user input.
    
    Parameters:
        tokenizer: The tokenizer of the model.
        model: The language model.
        chat_history_ids: Chat history token IDs.
        user_input: User input message.
        max_length: Maximum length of the response.
    
    Returns:
        response: Chatbot response.
        chat_history_ids: Updated chat history.
    """
    # Encode the user input
    new_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors="pt")
    
    # Concatenate the previous history with the new input
    bot_input_ids = new_input_ids if chat_history_ids is None else torch.cat([chat_history_ids, new_input_ids], dim=-1)
    
    # Generate a response considering the context
    chat_history_ids = model.generate(bot_input_ids, max_length=max_length, pad_token_id=tokenizer.eos_token_id, no_repeat_ngram_size=2)
    
    # Extract only the part of the response generated (excluding the user input)
    response_ids = chat_history_ids[:, bot_input_ids.shape[-1]:]
    response = tokenizer.decode(response_ids[0], skip_special_tokens=True)
    
    return response.strip(), chat_history_ids

def chat():
    """
    Main function to interact with the chatbot.
    
    Commands:
        "exit"  -> Ends the conversation.
        "reset" -> Resets the conversation history.
    """
    print("Inicializando o chatbot...")
    tokenizer, model = initialize_dialogpt()
    print("Chatbot inicializado!\nDigite 'exit' para sair ou 'reset' para limpar o contexto.\n")
    
    chat_history_ids = None

    while True:
        try:
            user_input = input("Você: ").strip()
            if not user_input:
                continue  # Skip empty inputs
            
            if user_input.lower() == "exit":
                print("Chatbot: Até logo!")
                break
            
            if user_input.lower() == "reset":
                chat_history_ids = None
                print("Chatbot: Contexto da conversa limpo.")
                continue
            
            response, chat_history_ids = generate_response(tokenizer, model, chat_history_ids, user_input)
            print("Chatbot:", response)
        
        except KeyboardInterrupt:
            print("\nChatbot: Interação interrompida pelo usuário.")
            break
        except Exception as e:
            print(f"Chatbot: Ops! Ocorreu um erro: {e}")
            chat_history_ids = None  # Reset chat history in case of error

if __name__ == "__main__":
    chat()


Inicializando o chatbot...
Chatbot inicializado!
Digite 'exit' para sair ou 'reset' para limpar o contexto.



The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Chatbot: I'm not sure if you're being serious or not, but I'm going to assume you are.
Chatbot: Hi, I am not being sarcastic. I have no idea what you mean. What is your favorite color?
Chatbot: Oh, that's a good one. Purple is my favorite colour. It's so pretty. And it's not too bright. So pretty!
Chatbot: That's my favourite colour too! I love it. :D
Chatbot: Yes, yes I do. That's why I like it so much. lt 3
Chatbot: Contexto da conversa limpo.
Chatbot: I am a Soul.

Chatbot: Interação interrompida pelo usuário.


In [None]:
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from functools import lru_cache # For caching
import gc

# --- Configuration ---
PDF_PATH = "WEB-Livro-dos-Espíritos-Guillon.pdf"
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_NAME = "gpt2" # Or try EleutherAI/gpt-neo-125M if you have resources

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# --- 1. Load and Split PDF ---
print(f"Loading PDF: {PDF_PATH}...")
loader = PyPDFLoader(PDF_PATH)
documents = loader.load()
if not documents:
    print(f"Error loading {PDF_PATH}.")
    exit()
print(f"Loaded {len(documents)} pages from PDF.")

print("Splitting into chunks...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=100)
texts = text_splitter.split_documents(documents)
del documents
gc.collect()
print(f"Created {len(texts)} chunks.")

# --- 2. Create Embeddings and Vector Store ---
print(f"Loading embeddings: {EMBEDDING_MODEL_NAME}...")
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={'device': DEVICE}
)
print("Creating FAISS index...")
vectorstore = FAISS.from_documents(texts, embeddings)
del texts
del embeddings
gc.collect()
print("Vector store created.")

# --- 3. Load LLM ---
print(f"Loading LLM: {LLM_MODEL_NAME}...")
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME).to(DEVICE)
model.eval()
print("LLM loaded.")

# --- 4. Create Pipeline ---
print("Creating pipeline...")
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float32,
    trust_remote_code=True,
    device=0 if DEVICE == "cuda" else -1,
    max_new_tokens=150,
    temperature=0.7,
    top_p=0.95
)
print("Pipeline created.")

# --- 5. Caching & Chat History ---
# Simple dictionary cache (you could replace with LRU cache)
query_cache = {} #query : result

# Chat History (Optional, but helps for follow-up questions)
chat_history = [] #list of tuples (question, answer)

def generate_response(query, conversation_history=None, use_cache=True):
    """Generates a response, checking cache and incorporating chat history."""
    # --- 5a. Check Cache ---
    if use_cache and query in query_cache:
        print("Using cached result...")
        return query_cache[query]

    # --- 5b. Incorporate Chat History ---
    augmented_query = query # start with the user's query
    if conversation_history:
        #Add previous turns to the query to give context to the LLM
        history_context = "\n".join(f"User: {q}\nChatbot: {a}" for q, a in conversation_history[-2:]) #Last two turns only
        augmented_query = f"{history_context}\nUser question: {query}" #add it to the existing query
        print(f"Augmented query with history: {augmented_query}")

    # --- 5c. Retrieval and Generation ---
    retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

    retrieved_docs = retriever.invoke(augmented_query) #Langchain 0.1.x or later

    context_text = "\n".join([doc.page_content for doc in retrieved_docs])
    prompt = f"Context: {context_text}\nUser: {augmented_query}\nChatbot:"

    # Generation (use pipeline directly for simplicity)
    input_ids = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True).to(DEVICE)
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            max_length=1024,
            num_beams=4,
            early_stopping=True,
        )
    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # --- 5d. Update Cache ---
    if use_cache:
        query_cache[query] = answer

    return answer

# --- 6. Chat Loop ---
print("\n--- Chat with your PDF ---")
print("Type 'sair' to exit.")
while True:
    query = input("Você: ")
    if query.lower() == 'sair':
        print("Até logo!")
        break
    if not query.strip():
        continue

    print("Pensando...")
    try:
        #Generate a response considering history and cache
        answer = generate_response(query, chat_history)
        print("\nChatbot:", answer)

        #Update chat history
        chat_history.append((query, answer))
    except Exception as e:
        print(f"Error: {e}")
    finally:
        gc.collect()

print("Done.")


In [7]:
import ollama
from sentence_transformers import SentenceTransformer, util
import PyPDF2

class OllamaPDFChatbot:
    def __init__(self, pdf_path):
        self.pdf_text = self.load_pdf(pdf_path)
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

    def load_pdf(self, pdf_path):
        # Extract text from PDF and split into chunks (e.g., paragraphs)
        text_chunks = []
        try:
            with open(pdf_path, "rb") as f:
                reader = PyPDF2.PdfReader(f)
                for page in reader.pages:
                    text = page.extract_text()
                    if text:
                        # Split by double newlines or paragraphs
                        chunks = [chunk.strip() for chunk in text.split('\n\n') if chunk.strip()]
                        text_chunks.extend(chunks)
            return text_chunks
        except Exception as e:
            print(f"Error reading PDF: {e}")
            return []

    def find_best_context(self, query):
        # Context retrieval using embeddings
        if not self.pdf_text:
            return "Nenhum texto carregado do PDF."
        embeddings = self.embedding_model.encode([query] + self.pdf_text)
        similarities = util.pytorch_cos_sim(embeddings[0], embeddings[1:])
        best_match_idx = similarities.argmax()
        return self.pdf_text[best_match_idx]

    def chat(self):
        while True:
            query = input("Você: ")
            if query.lower() == 'sair':
                break

            context = self.find_best_context(query)

            # Use Ollama for generation
            response = ollama.chat(
                model='llama2',
                messages=[
                    {'role': 'system', 'content': 'You are a helpful assistant answering questions about a book.'},
                    {'role': 'user', 'content': f'Context: {context}\nQuestion: {query}'}
                ]
            )

            print("Chatbot:", response['message']['content'])

# Usage:
# bot = OllamaPDFChatbot("WEB-Livro-dos-Espíritos-Guillon.pdf")
# bot.chat()

In [11]:
bot = OllamaPDFChatbot("/Users/ds/Documents/PromptEng/pdfs/WEB-Livro-dos-Espíritos-Guillon.pdf")
bot.chat()

Chatbot: Bem-vindo! I'm here to help you with any questions you may have about "O Livro dos Espíritos" (The Book of Spirits) by Carlos Castaneda. Please go ahead and ask your question, and I'll do my best to provide a helpful answer.


KeyboardInterrupt: 