In [6]:
!find /content -type d -name "__pycache__" -exec rm -rf {} +


In [7]:
!pip install -q sentence-transformers pinecone

In [8]:
import os

dirs = [
    "ragagentops",
    "ragagentops/app",
    "ragagentops/app/services"
]

for d in dirs:
    os.makedirs(d, exist_ok=True)


In [9]:
%%writefile ragagentops/app/services/embedding_agent.py
from sentence_transformers import SentenceTransformer

class EmbeddingAgent:
    def __init__(self):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")
        print("✅ Embedding model loaded.")

    def embed_text(self, text):
        embedding = self.model.encode(text).tolist()
        return embedding


Overwriting ragagentops/app/services/embedding_agent.py


In [10]:
!find /content -type d -name "__pycache__" -exec rm -rf {} +

In [11]:
%%writefile ragagentops/app/services/vector_store.py
from pinecone import Pinecone

class VectorStore:
    def __init__(self, api_key, index_name):
        self.api_key = api_key
        self.index_name = index_name
        self.pc = None
        self.index = None

    def connect(self):
        """Connect to Pinecone and initialize index."""
        self.pc = Pinecone(api_key=self.api_key)
        self.index = self.pc.Index(self.index_name)
        print(f"✅ Connected to Pinecone index: {self.index_name}")

    def upsert_vectors(self, vectors, ids):
        """Upsert vectors into Pinecone index."""
        vectors_list = vectors.tolist() if hasattr(vectors, "tolist") else vectors
        records = list(zip(ids, vectors_list))
        self.index.upsert(vectors=records)
        print(f"✅ Upserted {len(records)} vectors.")

    def query_vector(self, vector, top_k=3):
        """Query Pinecone index with a vector."""
        vector_list = vector.tolist() if hasattr(vector, "tolist") else vector
        result = self.index.query(vector=vector_list, top_k=top_k)
        return result


Overwriting ragagentops/app/services/vector_store.py


In [12]:
import sys
sys.path.append("ragagentops")

from app.services.embedding_agent import EmbeddingAgent
from app.services.vector_store import VectorStore
from google.colab import userdata

pinecone_key=userdata.get("PINECONE_API_KEY")

embedding_agent = EmbeddingAgent()
vector_store = VectorStore(api_key=pinecone_key, index_name="ragagentops")
vector_store.connect()

doc_id = "doc1"
content = "This is a test document for Pinecone integration."

embedding = embedding_agent.embed_text(content)
vector_store.upsert_vectors([embedding], [doc_id])

query = "test document integration"
query_embedding = embedding_agent.embed_text(query)
result = vector_store.query_vector(query_embedding, top_k=1)

print("✅ Query Result:", result)


✅ Embedding model loaded.
✅ Connected to Pinecone index: ragagentops
✅ Upserted 1 vectors.
✅ Query Result: {'matches': [{'id': 'doc1', 'score': 0.428396761, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}


In [13]:
!pip install pypdf



In [14]:
!pip install requests



In [15]:
import requests
from pypdf import PdfReader

def download_pdf(url, save_path="downloaded.pdf"):
    """Download a PDF from a URL."""
    response = requests.get(url)
    if response.status_code == 200:
        with open(save_path, "wb") as f:
            f.write(response.content)
        print(f"✅ PDF downloaded and saved as {save_path}")
    else:
        raise Exception(f"Failed to download PDF. Status code: {response.status_code}")

def load_pdf_text(file_path):
    """Extract text from a local PDF file."""
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    print(f"✅ Extracted text from PDF ({len(text)} characters)")
    return text


In [16]:
# PDF URL (example)
pdf_url = "https://arxiv.org/pdf/2106.04554.pdf"  # You can replace this with your PDF link

# Step 1: Download
download_pdf(pdf_url, "my_paper.pdf")

# Step 2: Extract Text
pdf_text = load_pdf_text("my_paper.pdf")

# Preview extracted text
print(pdf_text[:500])  # First 500 chars


✅ PDF downloaded and saved as my_paper.pdf
✅ Extracted text from PDF (141239 characters)
A Survey of Transformers
TIANYANG LIN, YUXIN WANG, XIANGYANG LIU, and XIPENG QIU∗, School of Computer
Science, Fudan University, China and Shanghai Key Laboratory of Intelligent Information Processing, Fudan
University, China
Transformers have achieved great success in many artificial intelligence fields, such as natural language
processing, computer vision, and audio processing. Therefore, it is natural to attract lots of interest from
academic and industry researchers. Up to the present, a gre


In [17]:
# Ingest PDF text into pipeline
embedding = embedding_agent.embed_text(pdf_text)
vector_store.upsert_vectors([embedding], ["my-pdf-doc"])

# Query
query = "what is this about ?"
query_embedding = embedding_agent.embed_text(query)
result = vector_store.query_vector(vector=query_embedding, top_k=1)
print("Query Result:", result)


✅ Upserted 1 vectors.
Query Result: {'matches': [{'id': 'chunk-24', 'score': 0.16680631, 'values': []}],
 'namespace': '',
 'usage': {'read_units': 1}}


In [18]:
import os
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from pypdf import PdfReader
from textwrap import wrap
from google.colab import userdata

pinecone_key=userdata.get("PINECONE_API_KEY")


# ✅ Initialize embedding model
embedding_agent = SentenceTransformer("all-MiniLM-L6-v2")
print("✅ Embedding model loaded.")

# ✅ Connect Pinecone
pc = Pinecone(api_key=pinecone_key)
index = pc.Index("ragagentops")
print("✅ Connected to Pinecone index.")


✅ Embedding model loaded.
✅ Connected to Pinecone index.


In [19]:
# ✅ Load PDF
pdf_path = "/content/my_paper.pdf"  # Change this path to your uploaded PDF
reader = PdfReader(pdf_path)
pdf_text = " ".join(page.extract_text() for page in reader.pages)
print("✅ PDF Loaded.")

# ✅ Chunk PDF into ~1000 character chunks
chunks = wrap(pdf_text, width=1000)
print(f"✅ Split PDF into {len(chunks)} chunks.")


✅ PDF Loaded.
✅ Split PDF into 142 chunks.


In [20]:
# ✅ Embed & Upsert Chunks
for idx, chunk in enumerate(chunks):
    embedding = embedding_agent.encode(chunk).tolist()
    index.upsert(vectors=[(f"chunk-{idx}", embedding)])

print("✅ All chunks upserted.")


✅ All chunks upserted.


In [21]:
# ✅ Query Example
query = "Explain the main topic discussed."
query_embedding = embedding_agent.encode(query).tolist()

# ✅ Search Top 3 Relevant Chunks
result = index.query(vector=query_embedding, top_k=3)

print("Query Result:")
for match in result['matches']:
    chunk_id = match['id']
    score = match['score']
    chunk_num = int(chunk_id.split('-')[1])
    chunk_text = chunks[chunk_num]
    print(f"\n🔎 Chunk ID: {chunk_id} | Score: {score:.4f}")
    print(f"📄 Content:\n{chunk_text[:300]}...")  # Showing first 300 chars


Query Result:

🔎 Chunk ID: chunk-25 | Score: 0.2038
📄 Content:
corresponding memory block. Fig. 4(e) depicts a commonly used case where the memory blocks are identical to their corresponding query blocks. 4.1.1.2 Compound Sparse Attention. Existing sparse attentions are often composed of more than one of the above atomic patterns. Fig. 5 illustrates some repres...

🔎 Chunk ID: chunk-133 | Score: 0.1784
📄 Content:
ICML. 9438–9447. http://proceedings.mlr.press/v119/tay20a.html [133] Yao-Hung Hubert Tsai, Shaojie Bai, Makoto Yamada, Louis-Philippe Morency, and Ruslan Salakhutdinov. 2019. Transformer Dissection: An Unified Understanding for Transformer’s Attention via the Lens of Kernel. InProceedings of EMNLP-I...

🔎 Chunk ID: chunk-26 | Score: 0.1764
📄 Content:
internal global-node attention. The global nodes are chosen to be [CLS] token for classification and all question tokens 10 Lin et al. for Question Answering tasks. They also replace some of the band attention heads in upper layer

In [22]:
# ✅ Retrieve Full Text of Top Chunks
top_matches = result['matches']

for match in top_matches:
    chunk_id = match['id']
    score = match['score']
    chunk_index = int(chunk_id.split('-')[1])  # Assuming IDs like "chunk-25"
    chunk_text = chunks[chunk_index]

    print(f"\n🔎 Chunk ID: {chunk_id} | Score: {score:.4f}")
    print(f"📄 Content:\n{chunk_text}\n")



🔎 Chunk ID: chunk-25 | Score: 0.2038
📄 Content:
corresponding memory block. Fig. 4(e) depicts a commonly used case where the memory blocks are identical to their corresponding query blocks. 4.1.1.2 Compound Sparse Attention. Existing sparse attentions are often composed of more than one of the above atomic patterns. Fig. 5 illustrates some representative compound sparse attention patterns. k 9 q 8 (a) Star-Transformer k 9 q 8 (b) Longformer k 9 q 8 (c) ETC k 9 q 8 (d) BigBird Fig. 5. Some representative compound sparse attention patterns. The red boxes indicate sequence boundaries. Star-Transformer [43] uses a combination of band attention and global attention. Specifically, Star-Transformer just includes only a global node and a band attention with the width of 3, in which any pair of non-adjacent nodes are connected through a shared global node and adjacent nodes are connected directly with each other. This kind of sparse pattern forms a star-shaped graph among nodes. Longformer [ 1

In [23]:
from transformers import pipeline

# ✅ Load Summarizer (this will download model first time)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# ✅ Summarize First Match
summary = summarizer(chunk_text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
print("\n📝 Summary:\n", summary)


Device set to use cpu



📝 Summary:
  Extended Transformer Construction (ETC) utilizes combination of band attention and external global-node attention . ETC also includes a masking mechanism to handle structured inputs and adapt Contrastive Predictive Coding (CPC) for pre-training . BigBird uses additional random attention to approximate full attention .


In [24]:
from transformers import pipeline

# ✅ Load QA model (small but effective for demos)
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# ✅ Use First Top Chunk for QA
context = chunk_text  # or join multiple chunks

question = "What is this section about?"
answer = qa_pipeline(question=question, context=context)
print(f"\n💬 Answer: {answer['answer']}")


Device set to use cpu



💬 Answer: Extended Transformer Construction


**picture-perfect till here ✅**  Any doubts run the cells in sequence

In [27]:
# ✅ Ingest Document (PDF or Text)
from google.colab import files
import fitz  # PyMuPDF

# ✅ Upload PDF file manually here if needed
uploaded = files.upload()

# ✅ Read uploaded PDF or Enter text manually
pdf_file = list(uploaded.keys())[0] if uploaded else None
if pdf_file:
    with fitz.open(pdf_file) as doc:
        pdf_text = ""
        for page in doc:
            pdf_text += page.get_text()
    content = pdf_text
    print("✅ PDF content extracted.")
else:
    content = input("Enter text content manually: ")

# ✅ Initialize agent
embedding_agent = EmbeddingAgent()

doc_id = input("Enter Document ID: ")
embedding = embedding_agent.embed_text(content)  # ✅ CORRECT
vector_store.upsert_vectors([embedding], [doc_id])
print(f"✅ Document '{doc_id}' ingested successfully.")


Saving trial_updating.pdf to trial_updating (1).pdf
✅ PDF content extracted.
✅ Embedding model loaded.
Enter Document ID: trial_updating_69
✅ Upserted 1 vectors.
✅ Document 'trial_updating_69' ingested successfully.


In [28]:
# ✅ Query the document
query = input("Enter your query: ")
query_embedding = embedding_agent.embed_text(query)
result = vector_store.query_vector(vector=query_embedding, top_k=3)

# ✅ Display results
print("🔎 Query Result:\n")
for match in result['matches']:
    doc_id = match['id']
    score = match['score']
    print(f"📄 Document ID: {doc_id}")
    print(f"🔢 Score: {score}\n")


Enter your query: what is this about ?
🔎 Query Result:

📄 Document ID: chunk-24
🔢 Score: 0.16680631

📄 Document ID: chunk-20
🔢 Score: 0.166210338

📄 Document ID: chunk-100
🔢 Score: 0.164624885



**Successfully done**

In [32]:
# ✅ Install packages (only once)
!pip install ipywidgets PyMuPDF --quiet

import ipywidgets as widgets
from IPython.display import display, clear_output
import fitz  # PyMuPDF

# ✅ Widgets
upload_widget = widgets.FileUpload(accept='.pdf', multiple=False)
query_widget = widgets.Text(description="Query:", placeholder='Enter your query here')
ingest_button = widgets.Button(description="Ingest PDF", button_style='success')
query_button = widgets.Button(description="Query", button_style='info')
output = widgets.Output()

# ✅ Helper functions
chunks_dict = {}  # Stores chunk_id → text mapping

def extract_pdf_chunks(file_bytes, chunk_size=500):
    with fitz.open(stream=file_bytes, filetype="pdf") as doc:
        full_text = ""
        for page in doc:
            full_text += page.get_text()
    chunks = [full_text[i:i + chunk_size] for i in range(0, len(full_text), chunk_size)]
    return chunks

def on_ingest_clicked(b):
    output.clear_output()
    with output:
        if not upload_widget.value:
            print("⚠️ Please upload a PDF.")
            return
        uploaded_file = list(upload_widget.value.values())[0]
        chunks = extract_pdf_chunks(uploaded_file['content'])
        print(f"✅ PDF extracted into {len(chunks)} chunks.")
        for i, chunk in enumerate(chunks):
            embedding = embedding_agent.embed_text(chunk)
            chunk_id = f"chunk-{i}"
            vector_store.upsert_vectors([embedding], [chunk_id])
            chunks_dict[chunk_id] = chunk
        print(f"✅ {len(chunks)} chunks ingested successfully.")

def on_query_clicked(b):
    output.clear_output()
    with output:
        query = query_widget.value
        query_embedding = embedding_agent.embed_text(query)
        result = vector_store.query_vector(vector=query_embedding, top_k=3)
        print(f"🔍 Query: {query}\n")
        for match in result['matches']:
            chunk_id = match['id']
            score = match['score']
            chunk_text = chunks_dict.get(chunk_id, "[Chunk not found]")
            print(f"🔎 Document ID: {chunk_id} | Score: {score:.4f}\n📄 Content:\n{chunk_text}\n")

# ✅ Bind events
ingest_button.on_click(on_ingest_clicked)
query_button.on_click(on_query_clicked)

# ✅ Show UI
display(
    widgets.VBox([
        widgets.Label("📄 Upload PDF:"),
        upload_widget,
        ingest_button,
        widgets.Label("🔍 Query:"),
        query_widget,
        query_button,
        output
    ])
)

VBox(children=(Label(value='📄 Upload PDF:'), FileUpload(value={}, accept='.pdf', description='Upload'), Button…

**Perfectly Working**

In [33]:
!apt-get install git -y


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [34]:
!git config --global user.email "21831a6631@gniindia.org"
!git config --global user.name "ks-suraj"


In [35]:
!git clone https://github.com/ks-suraj/projects.git
%cd projects
!git checkout RAG-Agent-Ops


Cloning into 'projects'...
remote: Enumerating objects: 427, done.[K
remote: Counting objects: 100% (280/280), done.[K
remote: Compressing objects: 100% (167/167), done.[K
remote: Total 427 (delta 97), reused 236 (delta 59), pack-reused 147 (from 1)[K
Receiving objects: 100% (427/427), 389.17 KiB | 3.54 MiB/s, done.
Resolving deltas: 100% (134/134), done.
/content/projects
Branch 'RAG-Agent-Ops' set up to track remote branch 'RAG-Agent-Ops' from 'origin'.
Switched to a new branch 'RAG-Agent-Ops'


In [36]:
!cp -r /content/ragagentops ./ragagentops


In [37]:
!git add ragagentops
!git commit -m "Add RAG-Agent-Ops pipeline from Colab"


[RAG-Agent-Ops 943fa1a] Add RAG-Agent-Ops pipeline from Colab
 4 files changed, 37 insertions(+)
 create mode 100644 ragagentops/app/services/__pycache__/embedding_agent.cpython-311.pyc
 create mode 100644 ragagentops/app/services/__pycache__/vector_store.cpython-311.pyc
 create mode 100644 ragagentops/app/services/embedding_agent.py
 create mode 100644 ragagentops/app/services/vector_store.py


In [40]:
from google.colab import userdata
token = userdata.get('GITHUB_TOKEN')  # assuming you saved it in Colab secrets


In [44]:
token = userdata.get("GITHUB_TOKEN")
repo_url = f"https://{token}@github.com/ks-suraj/projects.git"

# Now pass it using shell command
!git remote set-url origin "{repo_url}"
!git push origin RAG-Agent-Ops


Enumerating objects: 11, done.
Counting objects:   9% (1/11)Counting objects:  18% (2/11)Counting objects:  27% (3/11)Counting objects:  36% (4/11)Counting objects:  45% (5/11)Counting objects:  54% (6/11)Counting objects:  63% (7/11)Counting objects:  72% (8/11)Counting objects:  81% (9/11)Counting objects:  90% (10/11)Counting objects: 100% (11/11)Counting objects: 100% (11/11), done.
Delta compression using up to 2 threads
Compressing objects:  12% (1/8)Compressing objects:  25% (2/8)Compressing objects:  37% (3/8)Compressing objects:  50% (4/8)Compressing objects:  62% (5/8)Compressing objects:  75% (6/8)Compressing objects:  87% (7/8)Compressing objects: 100% (8/8)Compressing objects: 100% (8/8), done.
Writing objects:  10% (1/10)Writing objects:  20% (2/10)Writing objects:  30% (3/10)Writing objects:  40% (4/10)Writing objects:  50% (5/10)Writing objects:  60% (6/10)Writing objects:  70% (7/10)Writing objects:  80% (8/10)Writing objects:  90% (9/10)Wri

**Everything is Done and Pushed**