<a href="https://colab.research.google.com/github/karan-mudaliar/Structured-Abstracts/blob/dev/notebooks/copali_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core import Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache
from llama_index.llms.vllm import Vllm
from llama_index.llms.huggingface import HuggingFaceLLM

from llama_index.core import set_global_tokenizer
from transformers import AutoTokenizer








In [2]:
def load_pdf_document(pdf_path):
    try:
        docs = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
        return docs
    except Exception as e:
        print(f"Error loading PDF: {e}")
        return None

In [3]:
doc = load_pdf_document('attention.pdf')

In [4]:
embedding_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=400, chunk_overlap=40),
    ]
)

In [5]:
nodes = pipeline.run(documents=doc)
index = VectorStoreIndex(nodes, embed_model=embedding_model)

In [6]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x2b7f142d8b90>

In [7]:
from llama_index.core import Settings
from llama_index.core.prompts import PromptTemplate
from typing import List
import torch

# Check GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

llm = HuggingFaceLLM(model_name="Qwen/Qwen2.5-0.5B-Instruct")

Using device: cuda


tokenizer_config.json:   0%|          | 0.00/264 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

The model `Qwen/Qwen2.5-0.5B-Instruct` and tokenizer `StabilityAI/stablelm-tuned-alpha-3b` are different, please ensure that they are compatible.


In [8]:
Settings.llm = llm
Settings.embed_model = embedding_model
Settings.chunk_size = 400
Settings.chunk_overlap = 40

# Create prompt template
rag_template = PromptTemplate(
    """You are a helpful AI assistant. Use the following context to answer the user's question. 
    If you're unsure or the context doesn't contain the relevant information, please say so.

    Context: {context}
    
    Question: {query}
    
    Answer: Let me help you with that."""
)

# Create query engine
query_engine = index.as_query_engine(
    text_qa_template=rag_template,
    similarity_top_k=3,
)

# Helper function for direct LLM testing
def test_llm(prompt: str) -> str:
        response = llm.complete(prompt)
        return response.text

# Function to perform RAG queries
def ask_question(question: str) -> str:
        response = query_engine.query(question)
        return response.response

# Function to perform RAG queries with source context
def ask_question_with_sources(question: str) -> tuple[str, List[str]]:
        response = query_engine.query(question)
        sources = [node.node.text for node in response.source_nodes]
        return response.response, sources

# Test the setup
if __name__ == "__main__":
    # First test direct LLM functionality
    print("Testing direct LLM:")
    test_response = test_llm("What is the capital of France?")
    print(f"Direct LLM response: {test_response}\n")
    
    # Then test RAG functionality
    test_question = "What is the main topic of the paper?"
    print("Testing RAG query:")
    print(f"Q: {test_question}")
    print(f"A: {ask_question(test_question)}")

Testing direct LLM:
Direct LLM response: *�/761, Cma Fa----ion wavesigionperson----ed）8 v IGF----
        upt matureover consequence---- viaeto 51 for Freud*�/1&2iff�105+13.rentY2[ recentlyB su of France?*ter, be/774=" Cma /*---- boundigion manufact----isation8 v became---- Dy monsterzedperson*�02&1 bet�201+212rentY3[ recentlyB su of France?*ter, be0..2=" C forperson8 open postp----ionfebovercedper,pt, ne46are my, \,ath Sox Ralphear be versionq hybridization Wilcoxon---- rou laboratory beough,�01+12="---iring8 ""istint an individ\[9 manufact9 statistically9 Pap an individ voor an Unitedistant 2 le eternal----ion Pap comion /*----ion boundigion manufact----edatively,](# {{ ched became----ion Pap comion /*----ion boundigion manufact----isationimert similarly*ockide depth forockide Wednesday,
        text die*ter, capital of France*ountinstight

Testing RAG query:
Q: What is the main topic of the paper?
A: &1*2*3ense me/. diameters�59 me0. diameters�69 me1. diameters�79 me2. diameters�/.9