# Dependencies

In [9]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings, GooglePalmEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_transformers import EmbeddingsRedundantFilter, LongContextReorder
from langchain.retrievers.document_compressors import EmbeddingsFilter
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pickle
import os
from pathlib import Path

# vectorstore

In [10]:
LOCAL_VECTOR_STORE_DIR = Path("../data/vectorstore")

# Function to create a vectorstore

In [11]:
def create_vectorstore(embeddings, documents, vectorstore_name):
    """Create a Chroma vectorstore with persistence."""
    persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/" + vectorstore_name
    vector_store = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    vector_store.persist()
    return vector_store

# Function to select embeddings provider

In [12]:
def select_embeddings(provider):
    if provider == "openai":
        return OpenAIEmbeddings()
    elif provider == "huggingface":
        return HuggingFaceEmbeddings(model_name="thenlper/gte-large") # "all-MiniLM-L6-v2" "thenlper/gte-large"
    elif provider == "google":
        return GooglePalmEmbeddings(google_api_key=os.getenv("GOOGLE_API_KEY"))
    else:
        raise ValueError("Invalid provider.")

# Function to create a vectorstore-based retriever

In [13]:
def create_retriever(vectorstore, search_type="similarity", k=10, score_threshold=None):
    """Create a retriever based on the vectorstore."""
    search_kwargs = {}
    if k is not None:
        search_kwargs['k'] = k
    if score_threshold is not None:
        search_kwargs['score_threshold'] = score_threshold

    retriever = vectorstore.as_retriever(
        search_type=search_type,
        search_kwargs=search_kwargs
    )
    return retriever

# Function to create a contextual compression retriever

In [14]:
def create_compression_retriever(embeddings, base_retriever, chunk_size=3000, k=10, similarity_threshold=0.8):
    """Create a ContextualCompressionRetriever."""
    splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=100)
    redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
    relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold)
    reordering = LongContextReorder()

    pipeline_compressor = DocumentCompressorPipeline(
        transformers=[splitter, redundant_filter, relevant_filter, reordering]
    )
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=pipeline_compressor, 
        base_retriever=base_retriever
    )
    return compression_retriever

# Remove duplicates

In [15]:
def remove_duplicates(results):
    seen = set()
    unique_results = []
    for result in results:
        if result[0] not in seen:  # result[0] é o conteúdo do fragmento
            seen.add(result[0])
            unique_results.append(result)
    return unique_results


# Retrieve documents along with similarity scores and sort them by relevance.

In [16]:
def retrieve_with_scores(retriever, query, embeddings, k=4):
    results = retriever.get_relevant_documents(query)
    query_embedding = embeddings.embed_query(query)

    # Calculate similarity scores manually
    results_with_scores = []
    for result in results:
        doc_embedding = embeddings.embed_query(result.page_content)
        similarity = cosine_similarity(
            np.array(query_embedding).reshape(1, -1),
            np.array(doc_embedding).reshape(1, -1)
        )[0][0]  # Extract the similarity score
        results_with_scores.append((result.page_content, result.metadata, similarity))

    # Sort results by similarity score
    sorted_results = sorted(results_with_scores, key=lambda x: x[2], reverse=True)

    # Remove duplicates
    unique_results = remove_duplicates(sorted_results)
    return unique_results[:k]


# Load document fragments

In [17]:
with open("../data/fragments.pkl", "rb") as f:
    fragments = pickle.load(f)

print(f"Fragments loaded: {len(fragments)}")

Fragments loaded: 9


# Configure embeddings provider

In [18]:
provider = "huggingface"  # Change to "openai" or "google"
embeddings = select_embeddings(provider)

  return HuggingFaceEmbeddings(model_name="thenlper/gte-large") # "all-MiniLM-L6-v2" "thenlper/gte-large"


# Create vectorstore

In [19]:
vectorstore_name = "my_vectorstore"
vectorstore = create_vectorstore(embeddings, fragments, vectorstore_name)

⚠️ It looks like you upgraded from a version below 0.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.
  vector_store.persist()


# Create a basic retriever

In [20]:
retriever = create_retriever(vectorstore, search_type="similarity", k=4)

# Create a contextual compression retriever

In [21]:
compression_retriever = create_compression_retriever(
    embeddings=embeddings, 
    base_retriever=retriever, 
    chunk_size=500, 
    k=16, 
    similarity_threshold=0.8
)


# Test retrieval with the basic retriever


In [22]:
query = "Which course is best for beginners?"
results = retriever.get_relevant_documents(query)

print("Search results (Basic Retriever):")
for result in results:
    print(f"- Text: {result.page_content[:100]}...")
    print(f"  Metadata: {result.metadata}")


Search results (Basic Retriever):
- Text: Course 1: Blockchain Fundamentals What You'll Learn ● Understand the basic concepts of blockchain, i...
  Metadata: {'page': 0, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals What You'll Learn ● Understand the basic concepts of blockchain, i...
  Metadata: {'page': 0, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals What You'll Learn ● Understand the basic concepts of blockchain, i...
  Metadata: {'page': 0, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals What You'll Learn ● Understand the basic concepts of blockchain, i...
  Metadata: {'page': 0, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}


  results = retriever.get_relevant_documents(query)


In [23]:
query = "How much does the Blockchain Fundamentals course cost?"
results = retriever.get_relevant_documents(query)

print("Search results (Basic Retriever):")
for result in results:
    print(f"- Text: {result.page_content[:100]}...")
    print(f"  Metadata: {result.metadata}")

Search results (Basic Retriever):
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: B...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: B...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: B...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: B...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}


In [24]:
# Testar a recuperação com similaridade calculada manualmente
query = "What are the discounted prices for the courses?"
sorted_results = retrieve_with_scores(retriever, query, embeddings)

# Exibir resultados
print("Sorted results with calculated similarity:")
for text, metadata, score in sorted_results:
    print(f"- Text: {text[:100]}...")
    print(f"  Metadata: {metadata}")
    print(f"  Similarity Score: {score:.4f}")


Sorted results with calculated similarity:
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: B...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
  Similarity Score: 0.8672


In [25]:
# Exibir todos os resultados sem limitar o número
query = "What are the discounted prices for the courses?"
results = retrieve_with_scores(retriever, query, embeddings, k=10)

print("All sorted results:")
for i, (text, metadata, score) in enumerate(results):
    print(f"Result {i+1}:")
    print(f"- Text: {text[:200]}...")
    print(f"  Metadata: {metadata}")
    print(f"  Similarity Score: {score:.4f}\n")


All sorted results:
Result 1:
- Text: Course 1: Blockchain Fundamentals ○ Original Price: $50.00 ○ Discounted Price: $45.00 2. Course 2: Blockchain in Practice with XRPL ○ Original Price: $80.00 ○ Discounted Price: $72.00 3. Course 3: Int...
  Metadata: {'page': 4, 'source': '..\\data\\docs\\Informações dos cursos.pdf'}
  Similarity Score: 0.8672



In [26]:
import chromadb
from chromadb.utils import embedding_functions

# Configurar o ChromaDB
DB_PATH = "./chroma.sqlite3"
COLLECTION_NAME = "blockchain_courses"

# Inicializar cliente e função de embedding
client = chromadb.PersistentClient(path=DB_PATH)
embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="thenlper/gte-large")

# Verificar ou criar a coleção
collection = client.get_or_create_collection(COLLECTION_NAME, embedding_function=embedding_function)

# Definir cursos disponíveis
courses = [
    {
        "course_name": "Blockchain Fundamentals",
        "description": "Learn the basics of blockchain, its history, and how it is transforming industries.",
        "price": 50.00,
        "discounted_price": 45.00,  # Para pagamentos em XRP
        "duration": "5 weeks"
    },
    {
        "course_name": "Blockchain in Practice with XRPL",
        "description": "Hands-on course on using the XRP Ledger for practical applications.",
        "price": 80.00,
        "discounted_price": 72.00,
        "duration": "4 weeks"
    },
    {
        "course_name": "Cryptocurrency Trading",
        "description": "Learn trading strategies and risk management in the cryptocurrency market.",
        "price": 100.00,
        "discounted_price": 90.00,
        "duration": "6 weeks"
    },
    {
        "course_name": "Trading on Blockchain with XRPL",
        "description": "Develop trading strategies on blockchain using decentralized tools on the XRPL.",
        "price": 120.00,
        "discounted_price": 108.00,
        "duration": "5 weeks"
    }
]

# Dados para adicionar
documents = [f"{course['course_name']}: {course['description']}" for course in courses]
metadatas = [
    {
        "course_name": course["course_name"],
        "price": course["price"],
        "discounted_price": course["discounted_price"],
        "duration": course["duration"]
    } for course in courses
]
ids = [f"course_{i}" for i in range(len(courses))]

# Adicionar dados à coleção
collection.add(documents=documents, metadatas=metadatas, ids=ids)
print("Cursos adicionados com sucesso!")

# Validar o conteúdo da coleção
print("Conteúdo armazenado:")
print(collection.peek())


Cursos adicionados com sucesso!
Conteúdo armazenado:
{'ids': ['course_0', 'course_1', 'course_2', 'course_3'], 'embeddings': array([[ 0.01514417,  0.02171502, -0.01358613, ..., -0.0090631 ,
        -0.01939752,  0.00750446],
       [ 0.0339702 ,  0.02459448, -0.0189376 , ..., -0.01415251,
        -0.00678542,  0.02603018],
       [ 0.03041758,  0.01606825, -0.01833036, ...,  0.00618336,
         0.00104055,  0.00809577],
       [ 0.01893288,  0.02563498, -0.04354222, ..., -0.02083695,
         0.00204131,  0.01926755]]), 'documents': ['Blockchain Fundamentals: Learn the basics of blockchain, its history, and how it is transforming industries.', 'Blockchain in Practice with XRPL: Hands-on course on using the XRP Ledger for practical applications.', 'Cryptocurrency Trading: Learn trading strategies and risk management in the cryptocurrency market.', 'Trading on Blockchain with XRPL: Develop trading strategies on blockchain using decentralized tools on the XRPL.'], 'uris': None, 'data': N

### XRPL Testnet Wallet Configuration

In [28]:

from xrpl.wallet import Wallet

# Generate a Testnet Wallet
wallet = Wallet.create()
print("Testnet Wallet Address:", wallet.classic_address)
print("Testnet Wallet Seed:", wallet.seed)


Testnet Wallet Address: rKE2GpnKgQjjSyAApaxxHsoY4cG64xjsvP
Testnet Wallet Seed: sEd7QtfAeLcc2muuftZv6nrDSi3K6pw


### Display Payment Instructions

In [29]:

def show_payment_instructions(course_name, price_xrp):
    response_message = f"""To purchase the course **{course_name}**, please follow these steps:
1. Send **{price_xrp} XRP** to the following XRPL Testnet wallet address:
   **{wallet.classic_address}**

2. Use any XRPL-compatible wallet like [XRP Faucet](https://xrpl.org/xrp-testnet-faucet.html) to get Testnet XRP.

3. After completing the transaction, provide the **transaction hash** here for verification.
"""
    print(response_message)

# Example call
show_payment_instructions("Blockchain Fundamentals", 10.0)


To purchase the course **Blockchain Fundamentals**, please follow these steps:
1. Send **10.0 XRP** to the following XRPL Testnet wallet address:
   **rKE2GpnKgQjjSyAApaxxHsoY4cG64xjsvP**

2. Use any XRPL-compatible wallet like [XRP Faucet](https://xrpl.org/xrp-testnet-faucet.html) to get Testnet XRP.

3. After completing the transaction, provide the **transaction hash** here for verification.



### Verify Payment on XRPL Testnet

In [30]:
import httpx

# XRPL Testnet RPC URL
TESTNET_RPC = "https://s.altnet.rippletest.net:51234/"

def verify_payment(transaction_hash, expected_amount):
    try:
        # Chamada manual ao XRPL RPC usando httpx
        payload = {
            "method": "tx",
            "params": [
                {"transaction": transaction_hash, "binary": False}
            ]
        }
        response = httpx.post(TESTNET_RPC, json=payload)
        result = response.json()

        # Verificar se a transação foi validada
        if "result" in result and result["result"]["validated"]:
            delivered_amount = int(result["result"].get("meta", {}).get("delivered_amount", 0)) / 1_000_000
            if delivered_amount >= expected_amount:
                print("Payment verified successfully!")
                return True
            else:
                print(f"Insufficient payment. Expected {expected_amount} XRP but received {delivered_amount} XRP.")
        else:
            print("Transaction not yet validated on the ledger.")
    except Exception as e:
        print(f"Error verifying the transaction: {e}")
    return False

# Solicitação do hash da transação
transaction_hash = input("Enter the transaction hash: ")
verify_payment(transaction_hash, 10.0)


Error verifying the transaction: 'validated'


False

### Main Chatbot Logic with XRP Payment Integration

In [31]:

while True:
    user_input = input("User: ").lower()
    
    if "buy course" in user_input:
        # Purchase course flow
        course_name = "Blockchain Fundamentals"
        price_xrp = 10.0
        show_payment_instructions(course_name, price_xrp)
    
    elif "transaction hash" in user_input:
        # Verify payment
        transaction_hash = input("Please enter the transaction hash: ")
        if verify_payment(transaction_hash, 10.0):
            print("Access granted! You will receive the course materials shortly.")
        else:
            print("Payment verification failed. Please try again.")
    
    elif "exit" in user_input:
        print("Exiting chatbot. Goodbye!")
        break
    else:
        print("Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.")


Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.
Chatbot: I'm sorry, I didn't understand that. Try 'buy course' or 'exit'.


KeyboardInterrupt: Interrupted by user