In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!pip install langchain langchain-community chromadb pypdf sentence-transformers openai -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m42.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.4/20.4 MB[0m [31m137.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.5/323.5 kB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m99.6 MB/s[0m eta [36m0:00:

In [10]:
import os
from google.colab import userdata
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import Document

# --- Step 3: Set up the API Key and Environment ---
# IMPORTANT: Before running, you must add your OpenRouter API key to Colab's secrets.
# 1. Click the 'Key' icon on the left sidebar.
# 2. Add a new secret named "OPENROUTER_API_KEY" and paste your key as the value.
try:
    os.environ["OPENROUTER_API_KEY"] = userdata.get("OPENROUTER_API_KEY")
except Exception as e:
    print("ERROR: Could not find the OPENROUTER_API_KEY secret.")
    print("Please add your OpenRouter API key to Colab's secrets (on the left sidebar) and try again.")
    # Stop execution if the key is not found
    raise SystemExit(e)


# --- Step 4: Load and Process the PDF Document ---
# Updated the pdf_path to the new file you provided.
pdf_path = "/content/drive/MyDrive/RIL-IAR-2025.pdf"
if not os.path.exists(pdf_path):
    print(f"ERROR: The file '{pdf_path}' was not found.")
    print("Please upload the 'RIL-IAR-2025.pdf' file to your Colab session.")
else:
    print("Loading and processing the PDF... this may take a moment.")
    # Load the PDF
    loader = PyPDFLoader(pdf_path)
    pages = loader.load_and_split()

    # Split the document into smaller chunks for processing
    pdf_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    docs = pdf_splitter.split_documents(pages)
    documents = [Document(page_content=doc.page_content) for doc in docs]

    # --- Step 5: Create Text Embeddings and Vector Store ---
    # This converts the text chunks into numerical vectors for similarity searching.
    print("Creating text embeddings and vector store...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    vector_db = Chroma.from_documents(
        documents,
        embedding=embeddings
    )

    # --- Step 6: Set Up the Conversational AI Model ---
    # This configures the chatbot's "brain" and memory.
    print("Setting up the conversational AI...")
    # Set up conversational memory to remember the chat history
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )

    # Initialize the Language Model (LLM) through OpenRouter
    llm = ChatOpenAI(
        model="openai/gpt-3.5-turbo",
        temperature=0.2,
        openai_api_base="https://openrouter.ai/api/v1",
        max_tokens=500,
        openai_api_key=os.environ["OPENROUTER_API_KEY"]
    )

    # Combine the retriever (from the vector store) and the LLM into a conversational chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_db.as_retriever(),
        memory=memory
    )

    print("\n✅ Setup complete! The chatbot is ready.")
    print("You can now ask questions about the Reliance Industries 2024-25 Annual Report.")
    print("Type 'Exit' to end the chat.")
    print("-" * 50)

    # --- Step 7: Start the Real-time Interaction Loop ---
    while True:
        try:
            question = input("User: ")
            if question.lower().strip() == "exit":
                print("Bot: Thank you for chatting. Goodbye!")
                break
            if not question.strip():
                continue

            # Get the answer from the QA chain
            answer = qa_chain({"question": question})
            print("Bot:", answer["answer"])

        except Exception as e:
            print(f"An error occurred: {e}")
            break


Loading and processing the PDF... this may take a moment.
Creating text embeddings and vector store...
Setting up the conversational AI...

✅ Setup complete! The chatbot is ready.
You can now ask questions about the Reliance Industries 2024-25 Annual Report.
Type 'Exit' to end the chat.
--------------------------------------------------
User: what is reliance company is all about?
Bot: Reliance Industries Limited is India's largest private sector enterprise and a Fortune Global 500 leader. It operates across various sectors such as energy, retail, telecom, media, and green technologies, impacting millions of lives every day. Reliance is known for its contribution to India's growth momentum and its belief that 'Growth is Life'.
User: is reliance a good company?
Bot: I don't have enough information to provide an opinion on whether Reliance is a good company or not.
User: exit
Bot: Thank you for chatting. Goodbye!
Loading and processing the PDF... this may take a moment.
