# UMAP Visualizations for RAG using RAGxplorer

Reference: 

[1] Chua, G. (2024). RAGxplorer (Version 0.1.10) [Computer software]. https://github.com/gabrielchua/RAGxplorer

In [None]:
!pip install ragxplorer

In [None]:
import os
import requests
from urllib.parse import urlparse
from typing import Optional

from ragxplorer import RAGxplorer

### Uploading graphene corpus

In [None]:
pdf_url = "http://78.46.65.231/graphenepdfs_full.pdf"

In [None]:
def download_pdf(pdf_url: str, filename: Optional[str] = None, output_dir: Optional[str] = None) -> str:
    if output_dir is None:
        output_dir = os.getcwd()
    if filename is None:
        filename = os.path.basename(urlparse(pdf_url).path)
    output_path = os.path.join(output_dir, filename)
    response = requests.get(pdf_url)
    response.raise_for_status()
    with open(output_path, "wb") as f:
        f.write(response.content)
    print(f"Downloaded file saved at:\n{output_path}")
    
    return output_path

In [None]:
pdf_path = download_pdf(pdf_url)

### Testing all-MiniLM-L6-v2 embedding model 

In [None]:
client = RAGxplorer(embedding_model="all-MiniLM-L6-v2")
client.load_pdf(document_path=pdf_path, chunk_size=200, chunk_overlap=50, verbose=True)
client.visualize_query(
    "Provide me a step-by-step process to produce high-quality monolayer graphene. Provide the source."
)

### Testing text-embedding-3-small (OpenAI) embedding model

In [None]:
os.environ["OPENAI_API_KEY"] = "sk-"

In [None]:
client_openai = RAGxplorer(embedding_model="text-embedding-3-small")
client_openai.load_pdf(
    document_path=pdf_path, 
    chunk_size=200,
    chunk_overlap=50,
    verbose=True)
client_openai.visualize_query(
    query="Provide me a step-by-step process to produce high-quality monolayer graphene. Provide the source.", 
    retrieval_method="HyDE", 
    top_k=6, 
    query_shape_size=10
)