<a href="https://colab.research.google.com/github/khaledaldhaheri91-maker/khaled1/blob/main/ENEC_rag_vector_stores_pineconeindexdemo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pinecone Vector Store

If you're opening this Notebook on colab, you will probably need to install LlamaIndex ü¶ô.

In [1]:
%pip install llama-index llama-index-vector-stores-pinecone



In [5]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

#### Creating a Pinecone Index

In [6]:
from pinecone import Pinecone, ServerlessSpec

In [7]:
import openai
from google.colab import userdata

# Retrieve the OpenAI API key from Google Colab secrets
openai.api_key = userdata.get('openai')

if openai.api_key:
    os.environ["OPENAI_API_KEY"] = openai.api_key

In [8]:
#PUT Tyour api key

from google.colab import userdata


api_key = userdata.get('PINECONE_API_KEY')

if api_key:
    os.environ["PINECONE_API_KEY"] = api_key


pc = Pinecone(api_key=api_key)

In [9]:
api_key

'pcsk_4AiajQ_DVhXhPDBPKco8Ba1ifB4f7QjHF3SxivC4MMHcdNa5xEyWk49ubVGNFEhSttYPEY'

In [10]:
# dimensions are for text-embedding-ada-002

pc.create_index(
    name="quickstart",
    dimension=1536,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


{
    "name": "quickstart",
    "metric": "euclidean",
    "host": "quickstart-mb6dg4l.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 1536,
    "deletion_protection": "disabled",
    "tags": null
}

In [12]:
pinecone_index = pc.Index("quickstart")

#### Load documents, build the PineconeVectorStore and VectorStoreIndex

In [13]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.pinecone import PineconeVectorStore
from IPython.display import Markdown, display

Download Data

In [14]:
# load documents
documents = SimpleDirectoryReader("./data").load_data()

In [15]:
documents

[Document(id_='e64f498b-cbd4-4c0c-b29b-c964e3ae3e31', embedding=None, metadata={'page_label': '1', 'file_name': 'SOP_ Troubleshooting Overheated Boiler Feed Pump (Pump P-201).pdf', 'file_path': '/content/data/SOP_ Troubleshooting Overheated Boiler Feed Pump (Pump P-201).pdf', 'file_type': 'application/pdf', 'file_size': 474758, 'creation_date': '2025-12-15', 'last_modified_date': '2025-12-15'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='SOP: Troubleshooting Overheated Boiler Feed Pump \n(Pump P-201) \nDocument Number: SOP-OPS-MAINT-2025-07 \nRevision: 1.0 \nAuthor: J. Smith (Maintenance Engineer) \nApproved: A. Khan (Plant Manager) ‚Ä

The previous error occurred because the variable `pinecone_index` was used before it was defined. To fix this, I have combined the code that defines `pinecone_index` and the code that uses it into a single cell.

In [None]:
# Initialize Pinecone index
pinecone_index = pc.Index("quickstart")

# initialize without metadata filter
from llama_index.core import StorageContext

vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

#### Query Index

May take a minute or so for the index to be ready!

In [17]:
# set Logging to DEBUG for more detailed outputs
import time

query_engine = index.as_query_engine()
start_time = time.time()
response = query_engine.query("what is this document about")
display(Markdown(f"<b>{response}</b>"))

# End timer and print duration
end_time = time.time()
print(f"\nExecution Time: {end_time - start_time:.2f} seconds")

<b>This document outlines the standard procedure for troubleshooting an overheated industrial pump, specifically the Boiler Feed Pump P-201 in a thermal power plant. It covers the purpose, safety precautions, step-by-step actions, and diagnostics to identify and resolve an overheating condition in the pump. The scope includes recognizing overheating symptoms, immediate safety measures, root cause investigation, corrective actions, and safe restart procedures to minimize downtime and equipment damage.</b>


Execution Time: 2.12 seconds


In [18]:
display(Markdown(f"<b>{response}</b>"))

<b>This document outlines the standard procedure for troubleshooting an overheated industrial pump, specifically the Boiler Feed Pump P-201 in a thermal power plant. It covers the purpose, safety precautions, step-by-step actions, and diagnostics to identify and resolve an overheating condition in the pump. The scope includes recognizing overheating symptoms, immediate safety measures, root cause investigation, corrective actions, and safe restart procedures to minimize downtime and equipment damage.</b>

In [19]:
pip install gradio



In [None]:
import os
import logging
import sys
import gradio as gr
from IPython.display import Markdown, display

from pinecone import Pinecone, ServerlessSpec
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.openai import OpenAI

# --- Initialize Pinecone ---

index_name = "quickstart"
dimension = 1536

# Delete index if exists (optional: mirrors original behavior)
if index_name in [idx["name"] for idx in pc.list_indexes()]:
    pc.delete_index(index_name)

from llama_index.llms.openai import OpenAI
Settings.llm = OpenAI(model = "gpt-5.2-2025-12-11" , temperature=0)


# Create Pinecone index
pc.create_index(
    name=index_name,
    dimension=dimension,
    metric="euclidean",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

pinecone_index = pc.Index(index_name)

# --- Load Data ---
# Create folders & download a sample doc (kept same logic, fixed subfolder creation)

documents = SimpleDirectoryReader("./data").load_data()

# --- Create Index ---
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

# --- System Prompt (polite + answer-from-document constraint) ---
SYSTEM_PROMPT = """You are a power plant maintenance assistant.

Help troubleshoot an overheated Boiler Feed Pump (P-201).

Safety first: LOTO, depressurize, cool down.

Check common causes: oil, cooling, bearings, alignment, cavitation, low/high flow.

Give clear, step-by-step actions.

Recommend safe restart only after issues are fixed."
"""

# --- Query Engine ---
query_engine = index.as_query_engine()

def query_doc(user_question: str):
    if not user_question or not user_question.strip():
        return "Please enter a question."
    full_query = f"""{SYSTEM_PROMPT}

User question:
{user_question.strip()}
"""
    try:
        response = query_engine.query(full_query)
        text = str(response).strip()
        # Gentle post-processing to keep it brief/polite
        return text if text else "I couldn‚Äôt find that in the document."
    except Exception as e:
        return f"Error: {str(e)}"

# --- Gradio UI (Professional look with logo, centered title) ---
# Use the raw GitHub URL for proper image rendering.
LOGO_URL = "https://raw.githubusercontent.com/khaledaldhaheri91-maker/khaled1/main/ENEC%20logo.png"

CUSTOM_CSS = """
.gradio-container { font-family: Inter, ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, 'Helvetica Neue', Arial; }
.header-wrap {
    display: grid;
    grid-template-columns: 120px 1fr 120px;
    align-items: center;
    gap: 12px;
    padding: 12px 0 8px;
    border-bottom: 1px solid #eaeaea;
}
.header-logo { display:flex; align-items:center; justify-content:flex-start; }
.header-logo img { height: 48px; object-fit: contain; }
.header-title { text-align:center; }
.header-title h1 {
    margin: 0; font-weight: 700; font-size: 1.5rem; line-height: 1.2;
}
.header-spacer { height: 1px; }
.section { padding-top: 8px; }
.footer-note { text-align:center; font-size: 12px; color:#667085; padding: 8px 0 0; }
label.svelte-1ipelgc, .label-wrap label { font-weight: 600; }
"""

with gr.Blocks(css=CUSTOM_CSS, title="Insurance Document QA (LlamaIndex + Pinecone)") as demo:
    # Header with logo (left) and centered title
    with gr.Row(elem_classes="header-wrap"):
        with gr.Column(scale=0, elem_classes="header-logo"):
            gr.HTML(f'<img src="{LOGO_URL}" alt="ENEC" />')
        with gr.Column(scale=1, elem_classes="header-title"):
            gr.HTML("<h1>Insurance QA</h1>")
        with gr.Column(scale=0):
            gr.HTML("")  # right-side spacer

    gr.Markdown(
        "Ask questions based on the Insurance Document "
        "**Answers come only from the document**. If not found, I‚Äôll say so."
    )

    with gr.Group(elem_classes="section"):
        inp = gr.Textbox(
            label="Your question",
            placeholder="e.g., Ask in Insurance Question?",
            lines=2,
        )
        btn = gr.Button("Submit", variant="primary")
        out = gr.Textbox(label="Answer", lines=8)

    btn.click(fn=query_doc, inputs=inp, outputs=out)
    inp.submit(fn=query_doc, inputs=inp, outputs=out)

    gr.Markdown('<div class="footer-note">LlamaIndex + Pinecone ‚Ä¢ Demo</div>')

demo.launch()


Replicate the Deplpyment hugging face
https://huggingface.co/spaces/decodingdatascience/ddsinsurance1
openai key


In [21]:
documents

[Document(id_='35b1fe53-64ad-4058-bd6b-843e317903ce', embedding=None, metadata={'page_label': '1', 'file_name': 'SOP_ Troubleshooting Overheated Boiler Feed Pump (Pump P-201).pdf', 'file_path': '/content/data/SOP_ Troubleshooting Overheated Boiler Feed Pump (Pump P-201).pdf', 'file_type': 'application/pdf', 'file_size': 474758, 'creation_date': '2025-12-15', 'last_modified_date': '2025-12-15'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='SOP: Troubleshooting Overheated Boiler Feed Pump \n(Pump P-201) \nDocument Number: SOP-OPS-MAINT-2025-07 \nRevision: 1.0 \nAuthor: J. Smith (Maintenance Engineer) \nApproved: A. Khan (Plant Manager) ‚Ä