In [1]:
print('Hello World!')

Hello World!


In [4]:
!pip install langchain_community pypdf2 requests beautifulsoup4 -qU

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
import os
import re
import requests
from bs4 import BeautifulSoup
from PyPDF2 import PdfReader

# ======================================================
# 1) Create download folder
# ======================================================
DOWNLOAD_DIR = "pdf_downloads"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)


# ======================================================
# 2) SCRAPE PDF URLs FROM SCREENER
# ======================================================
def scrape_screener_pdfs(company_url):
    print(f"Scraping: {company_url}")
    headers = {"User-Agent": "Mozilla/5.0"}

    html = requests.get(company_url, headers=headers).text
    soup = BeautifulSoup(html, "html.parser")

    links = soup.select(".documents a")

    pdf_links = []

    for a in links:
        href = a.get("href", "")
        if href.endswith(".pdf"):
            text = a.text.strip().replace("\n", "_").replace(" ", "_")
            if not text:
                text = "Document"
            pdf_links.append((href, text + ".pdf"))

    print(f"Found {len(pdf_links)} PDF links.\n")
    return pdf_links


# ======================================================
# 3) PDF TYPE CLASSIFIER (Transcript / Presentation check)
# ======================================================
def classify_transcript_or_ppt(pdf_path):
    try:
        reader = PdfReader(pdf_path)
        text = reader.pages[0].extract_text()[:800].lower()
    except:
        return None

    if "transcript" in text or "earnings call" in text:
        return "transcript"
    if "presentation" in text or "investor presentation" in text:
        return "presentation"

    return None


# ======================================================
# 4) RENAME if transcript/presentation ‚Üí Add numbering
# ======================================================
def maybe_rename_transcript_or_ppt(saved_path):
    doc_type = classify_transcript_or_ppt(saved_path)

    if not doc_type:
        print("   ‚Üí Not transcript/presentation. Keeping original filename.\n")
        return saved_path

    # Original filename
    folder = os.path.dirname(saved_path)
    base = os.path.basename(saved_path)
    name, ext = os.path.splitext(base)

    # Make new filename with incremental number
    counter = 1
    new_name = f"{name}_{counter}{ext}"
    new_path = os.path.join(folder, new_name)

    while os.path.exists(new_path):
        counter += 1
        new_name = f"{name}_{counter}{ext}"
        new_path = os.path.join(folder, new_name)

    try:
        os.rename(saved_path, new_path)
        print(f"   ‚úî Transcript/PPT detected ‚Üí renamed to {new_name}\n")
        return new_path
    except:
        print("   ‚ö† Rename failed. Keeping original.\n")
        return saved_path


# ======================================================
# 5) DOWNLOAD FUNCTIONS
# ======================================================
def download_bse_annpdf(url, download_dir, filename):
    print(f"URL: {url}")
    print(f"[BSE-ANNPDF] Requesting: {url}")

    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    savepath = os.path.join(download_dir, filename)

    with open(savepath, "wb") as f:
        f.write(r.content)

    print(f"‚úî Saved BSE AnnPdf: {savepath}")
    return savepath


def download_bse_iframe_pdf(url, download_dir, filename):
    print(f"URL: {url}")
    print("   [BSE-IFRAME] Requesting main page‚Ä¶")

    html = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}).text
    soup = BeautifulSoup(html, "html.parser")
    iframe = soup.find("iframe")

    if not iframe:
        print("   ‚ùå No iframe found. Cannot download.")
        return None

    real_pdf = iframe.get("src")
    if not real_pdf.startswith("http"):
        real_pdf = "https://www.bseindia.com" + real_pdf

    print(f"   ‚Üí PDF Source: {real_pdf}")

    r = requests.get(real_pdf, headers={"User-Agent": "Mozilla/5.0"})
    savepath = os.path.join(download_dir, filename)

    with open(savepath, "wb") as f:
        f.write(r.content)

    print(f"   ‚úî Saved BSE iframe PDF: {savepath}")
    return savepath


def download_direct_pdf(url, download_dir, filename):
    print(f"[DIRECT] Downloading: {url}")
    r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
    savepath = os.path.join(download_dir, filename)

    with open(savepath, "wb") as f:
        f.write(r.content)

    print(f"‚úî Saved direct PDF: {savepath}")
    return savepath


# ======================================================
# Clean filename
# ======================================================
def clean_filename(name):
    name = re.sub(r'[\/:*?"<>|;,]', '_', name)
    name = re.sub(r"_+", "_", name)
    name = name.strip(" .")
    if len(name) > 180:
        name = name[:180]
    return name


# ======================================================
# 6) DECIDE WHICH DOWNLOADER TO USE
# ======================================================
def download_pdf(url, filename):

    filename = clean_filename(filename)

    if "AnnPdfOpen.aspx" in url:
        saved = download_bse_annpdf(url, DOWNLOAD_DIR, filename)
        return maybe_rename_transcript_or_ppt(saved)

    if "xml-data/corpfiling" in url:
        saved = download_bse_iframe_pdf(url, DOWNLOAD_DIR, filename)
        return maybe_rename_transcript_or_ppt(saved)

    saved = download_direct_pdf(url, DOWNLOAD_DIR, filename)
    return maybe_rename_transcript_or_ppt(saved)


# ======================================================
# 7) MAIN EXECUTION
# ======================================================
def run(company_url):
    pdfs = scrape_screener_pdfs(company_url)

    for url, filename in pdfs:
        print("-----------------------------------------")
        print(f"Downloading: {filename}")
        download_pdf(url, filename)


# ======================================================
# Run
# ======================================================
run("https://www.screener.in/company/TCS/consolidated/")


Scraping: https://www.screener.in/company/TCS/consolidated/
Found 66 PDF links.

-----------------------------------------
Downloading: Press_Release_-_TCS,_Sybyl,_And_Ixafrica_Partner_To_Accelerate_Sovereign_Cloud_Adoption_In_East_Africa_________________________2d_-_TCS,_Sybyl_and_iXAfrica_partner_to_deploy_sovereign_cloud_in_East_Africa,_announced_Nov_13,_2025..pdf
URL: https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=cc97a7a7-6326-4b41-9f1d-57cb58ccf31f.pdf
[BSE-ANNPDF] Requesting: https://www.bseindia.com/stockinfo/AnnPdfOpen.aspx?Pname=cc97a7a7-6326-4b41-9f1d-57cb58ccf31f.pdf
‚úî Saved BSE AnnPdf: pdf_downloads/Press_Release_-_TCS_Sybyl_And_Ixafrica_Partner_To_Accelerate_Sovereign_Cloud_Adoption_In_East_Africa_2d_-_TCS_Sybyl_and_iXAfrica_partner_to_deploy_sovereign_cloud_in_East_Africa_an
   ‚Üí Not transcript/presentation. Keeping original filename.

-----------------------------------------
Downloading: Press_Release_-_Lion_Selects_TCS_To_Drive_AI-Powered_Transformation_

In [79]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader('https://www.screener.in/company/TCS/consolidated/')

docs = loader.load()

split_documents = text_splitter.split_documents(docs)

vector_store.add_documents(splits)
    

['01245ab1-febd-4888-9f28-b35fd09cd392',
 '42351ac9-59b8-46a0-b64f-52b758eb57ee',
 '379eac9d-3429-44c5-8d87-e025be9893da',
 '285872c2-551c-4edd-98fc-1e5ee0889981',
 '699373b3-4cdb-4cf6-af8b-27cb260f24ea',
 '1931d259-90f2-4b16-a1f8-578725c706bb',
 '11b20ebf-e953-4dc9-bf45-e53d0ee7d040',
 '6c30b227-f86d-418c-a088-5fbb103abbaa',
 'ec3e3d5b-664a-4cf6-9fdc-2abfa8e03349',
 '0d7b5751-c916-4f2b-83fe-8b1049e7f308',
 'db5b6240-61e6-4441-8253-1ca259931d99',
 '58de4de4-cb0f-48da-92c8-fa41fbd479a1',
 'a51a7334-8ccc-49c2-8300-f0aaffc113a9',
 '39208a6b-89fc-4c3b-9ef8-53a1cbe251c6',
 '2356279d-2f29-4258-9466-4cbbb3fb547f',
 '1c6959a1-ed16-427d-9544-3fe938d6696a',
 '5bac26b5-66d0-4492-a601-c5576b2456c9',
 '9b5392b6-430d-4850-ae4c-f5c7c7d12300',
 '5e3a6732-1ccb-436c-9cef-224956485e28',
 '3cb22a87-e944-44e8-a00b-094ed1f2292c',
 'ef36de39-ac21-46c7-9df6-2624f49096c5',
 '67ec7e5c-7212-4ff1-afd3-12ef69868ed8',
 'db7e2aa9-6c53-48f6-b3e1-ae1f23a979bc',
 'e6df0e6a-49f9-4984-b821-016bd26a431e',
 '7a4ce2ee-2321-

In [48]:
!pip install -qU rapidocr-onnxruntime langchain_community langchain_text_splitters faiss-cpu langchain_huggingface langchain_groq


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [12]:
with open('pdf_downloads/html.txt', 'w') as f:
    f.write(raw_text)


In [42]:
from langchain_community.embeddings import FakeEmbeddings
embeddings = FakeEmbeddings(size=1352)


In [None]:
import faiss
from langchain_community.vectorstores import FAISS
# from langchain_huggingface import HuggingFaceEmbeddings

# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
# vector_store = FAISS.from_documents(chunks, embedding=embeddings)
# vector_store.save_local("faiss_index")

NameError: name 'chunks' is not defined

In [33]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  
    chunk_overlap=200, 
    separators=["\n\n", "\n", " ", ""] 
)

In [71]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, PDFMinerLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

vector_store = None
count = 0

for filename in os.listdir("pdf_downloads/"):
    file_path = f"pdf_downloads/{filename}"
    print(f"\nüìÑ Processing: {filename}")

    # Robust PDF loading
    try:
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
        except:
            print("‚ö† PyPDF failed, trying PDFMiner...")
            loader = PDFMinerLoader(file_path)
            docs = loader.load()
    except Exception as e:
        print(f"‚ùå Skipping {filename}: cannot load PDF -> {e}")
        continue

    # Ensure text exists
    splits = text_splitter.split_documents(docs)
    if not splits:
        print(f"‚ö† No extractable text in {filename}, skipping.")
        continue

    # Build or update FAISS index
    if vector_store is None:
        vector_store = FAISS.from_documents(splits, embedding=embeddings)
    else:
        vector_store.add_documents(splits)

    count += 1
    print(f"‚úî Added {len(splits)} chunks from {filename}")

# Save final index
if vector_store:
    vector_store.save_local("faiss_index")
    print("\nüéâ FAISS index saved successfully.")
else:
    print("\n‚ùå No valid PDFs processed; FAISS index not created.")


invalid pdf header: b'Tata '
EOF marker not found



üìÑ Processing: html.txt
‚ö† PyPDF failed, trying PDFMiner...
‚ùå Skipping html.txt: cannot load PDF -> pdfminer package not found, please install it with `pip install pdfminer.six`

üìÑ Processing: Announcement_under_Regulation_30_(LODR)-Newspaper_Publication_12_Nov_-_Reminder_Letter_to_shareholders-_Unclaimed_Dividend_(Third_Interim_Dividend_2018-19).pdf
‚úî Added 15 chunks from Announcement_under_Regulation_30_(LODR)-Newspaper_Publication_12_Nov_-_Reminder_Letter_to_shareholders-_Unclaimed_Dividend_(Third_Interim_Dividend_2018-19).pdf

üìÑ Processing: Financial_Year_2025_from_bse.pdf
‚úî Added 1413 chunks from Financial_Year_2025_from_bse.pdf

üìÑ Processing: Press_Release_-_ABB_Extends_18-Year_Partnership_With_TCS_To_Implement_AI-Driven_IT_Operations_5_Nov_-_TCS_expands_18-year_relationship_multi-year_AI-driven_hosting_modernization_en
‚úî Added 10 chunks from Press_Release_-_ABB_Extends_18-Year_Partnership_With_TCS_To_Implement_AI-Driven_IT_Operations_5_Nov_-_TCS_expands_18-y

In [None]:
# from langchain_community.document_loaders.parsers import TesseractBlobParser
# from langchain_community.document_loaders import PyPDFLoader


# loader = PyPDFLoader(
#     "pdf_downloads/Financial_Year_2017_from_bse.pdf",
# )

# docs = loader.load()

# len(docs)
            

262

In [None]:
# split_documents = text_splitter.split_documents(docs)

# vector_store = FAISS.from_documents(split_documents, embeddings)

# vector_store.save_local("faiss_index")

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="openai/gpt-oss-120b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    api_key=''
    # other params...
)

llm.invoke('what is full form TCS? answer me in one sentence')

AIMessage(content='TCS stands for Tata\u202fConsultancy\u202fServices.', additional_kwargs={'reasoning_content': 'The user asks: "what is full form TCS? answer me in one sentence". Provide answer in one sentence. Should be "TCS stands for Tata Consultancy Services." That\'s one sentence.'}, response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 83, 'total_tokens': 143, 'completion_time': 0.124245021, 'prompt_time': 0.003079659, 'queue_time': 0.048031821, 'total_time': 0.12732468, 'completion_tokens_details': {'reasoning_tokens': 40}}, 'model_name': 'openai/gpt-oss-120b', 'system_fingerprint': 'fp_1d1727abc9', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--8ac1f282-244b-446c-a810-860427df6063-0', usage_metadata={'input_tokens': 83, 'output_tokens': 60, 'total_tokens': 143, 'output_token_details': {'reasoning': 40}})

In [50]:
base_prompt = '''
You are an intelligent financial analysis agent specialized in reviewing company quarterly reports and earnings call transcripts.
Your primary function is to extract accurate financial metrics, analyze qualitative management commentary, and generate clear, structured insights.

During execution, you will receive:

{context}: Extracted text chunks from financial reports and transcripts

{question}: A specific analytical task (e.g., trends, risks, sentiment, outlook)

Guidelines for responding to {question} using {context}:

Accurate & Grounded: Use only the information found in the provided context‚Äîno guessing or fabricating data.

Financially Insightful: Provide concise explanations about revenue trends, margin movements, demand commentary, risks, and opportunities.

Forecast-Oriented: Highlight management sentiment and forward-looking statements relevant to future performance.

Structured JSON: Always respond in a predictable JSON format suitable for downstream processing.

No Investment Advice: Do not provide stock buy/sell/hold recommendations or personal financial advice.

Example JSON Output:
{{
  "reply": "Revenue grew 5% YoY driven by cloud and BFSI demand, while margins remained stable. Management highlighted healthy deal wins and improving client budgets.",
  "guidance_caution": "This summary is based solely on the provided financial context and does not constitute investment advice.",
  "follow_up_prompt": "Would you like insights on risks, opportunities, or the outlook for the next quarter?"
}}

'''

In [57]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

prompt = PromptTemplate(template=base_prompt, input_variables=["context", "question"])


In [80]:
user_question_2 = 'What risks, challenges, or opportunities did management highlight during the earnings call?'
extracted_chunks = vector_store.similarity_search(user_question_2,k=4)
chain = prompt | llm | JsonOutputParser()
response = chain.invoke({"context": extracted_chunks, "question": user_question_2})

In [81]:
print(response['reply'])

Management pointed to two main themes.  
1. **Tax‚Äërelated risks** ‚Äì The company disclosed tax contingencies of ‚Çπ318‚ÄØcrore for both FY‚ÄØ2025 and FY‚ÄØ2024.  It noted that it is entitled to an indemnification from the seller of TCS‚ÄØe‚ÄëServe Limited and that, after reviewing notices and inquiries from tax authorities in India, the United States and the United Kingdom, it believes any subsequent tax claims or demands are unlikely to succeed.  The discussion highlighted the variability of assessment periods across jurisdictions and the ongoing exposure to tax audits as a material risk.  
2. **Growth opportunities through intellectual capital** ‚Äì Management emphasized the firm‚Äôs expanding research and innovation ecosystem: >6,000 researchers, 4,820 patents granted/8,816 filed, 239 top‚Äëtier publications, 40+ research centres, 51 academic partners, and 3,000+ start‚Äëup collaborations.  It described ‚ÄúPurposeful AI‚Äù and ‚ÄúComputing Futures‚Äù as strategic focus areas, pos

In [82]:
extracted_chunks

[Document(id='812c62d2-0ee7-49ce-8fbb-1448c46ff884', metadata={'producer': 'Microsoft¬Æ Word for Microsoft 365', 'creator': 'Microsoft¬Æ Word for Microsoft 365', 'creationdate': '2025-05-27T20:18:10+05:30', 'author': 'Divyani Sawana', 'moddate': '2025-05-27T23:14:00+05:30', 'source': 'pdf_downloads/Financial_Year_2025_from_bse.pdf', 'total_pages': 337, 'page': 50, 'page_label': '51'}, page_content='page.\n 5. Click on options available against Company name or e-Voting service provider - NSDL and you will be re-directed to NSDL \ne-voting website for casting your vote during the remote e-voting period or joining virtual meeting and e-voting during \nthe meeting.\n If you are not registered, follow the below steps\n a.  Option to register is available at https://eservices.nsdl.com.\n b.  Select ‚ÄúRegister Online for IDeAS‚Äù Portal or click at https://on.tcs.com/NSDLRegn.\n c.  Please follow steps given above in points 1-5.\nFor ease of understanding, the Members may refer the chart for

In [70]:
import os
import datetime
from pypdf import PdfReader

FOLDER = "pdf_downloads/"
ONE_YEAR_DAYS = 365

def parse_pdf_date(date_str):
    # Format type 1: D:20181012055359+05'30'
    if date_str.startswith("D:"):
        try:
            dt = datetime.datetime.strptime(date_str[2:16], "%Y%m%d%H%M%S")
            return dt
        except:
            pass

    # Format type 2: Fri 12 Oct 2018 05:53:59 PM +05:30
    try:
        dt = datetime.datetime.strptime(date_str, "%a %d %b %Y %I:%M:%S %p %z")
        return dt.replace(tzinfo=None)
    except:
        return None


now = datetime.datetime.now()
deleted_files = 0

for filename in os.listdir(FOLDER):
    if not filename.lower().endswith(".pdf"):
        continue
    
    file_path = os.path.join(FOLDER, filename)

    try:
        reader = PdfReader(file_path)
        metadata = reader.metadata
        
        if "/CreationDate" in metadata:
            pdf_date = parse_pdf_date(metadata["/CreationDate"])
        elif "/ModDate" in metadata:
            pdf_date = parse_pdf_date(metadata["/ModDate"])
        else:
            print(f"‚ö† No metadata date found for {filename}, skipping.")
            continue

        if not pdf_date:
            print(f"‚ö† Could not parse date for {filename}, skipping.")
            continue

        age_days = (now - pdf_date).days

        if age_days > ONE_YEAR_DAYS:
            print(f"üóë Deleting: {filename} (Age: {age_days} days)")
            os.remove(file_path)
            deleted_files += 1
        else:
            print(f"‚úî Keeping: {filename} (Age: {age_days} days)")

    except Exception as e:
        print(f"‚ùå Error reading {filename}: {e}")

print(f"\n‚úÖ Done. Deleted {deleted_files} old PDF(s).")


üóë Deleting: Transcript_26.pdf (Age: 2227 days)
üóë Deleting: Transcript_23.pdf (Age: 1952 days)
üóë Deleting: Financial_Year_2015_from_bse.pdf (Age: 3432 days)
üóë Deleting: Transcript_30.pdf (Age: 2591 days)
üóë Deleting: Transcript_34.pdf (Age: 2955 days)
üóë Deleting: Transcript_15.pdf (Age: 1222 days)
üóë Deleting: Financial_Year_2014_from_bse.pdf (Age: 4188 days)
üóë Deleting: Transcript_27.pdf (Age: 2320 days)
üóë Deleting: Transcript_16.pdf (Age: 1309 days)
üóë Deleting: Transcript_32.pdf (Age: 2764 days)
üóë Deleting: PPT.pdf (Age: 3595 days)
üóë Deleting: Financial_Year_2017_from_bse.pdf (Age: 3068 days)
‚úî Keeping: Announcement_under_Regulation_30_(LODR)-Newspaper_Publication_12_Nov_-_Reminder_Letter_to_shareholders-_Unclaimed_Dividend_(Third_Interim_Dividend_2018-19).pdf (Age: 3 days)
‚úî Keeping: Financial_Year_2025_from_bse.pdf (Age: 172 days)
üóë Deleting: Transcript_6.pdf (Age: 397 days)
üóë Deleting: Transcript_22.pdf (Age: 1864 days)
üóë Deleting: Tra

In [55]:
!pip install nse-live-stocks -qU


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [56]:
from nse_live_stocks import Nse

stock = Nse()

result = stock.get_current_price('TCS')

print(result)

{'error': False, 'nse_symbol': 'TCS', 'current_value': '3112', 'date': '14-Nov-2025 16:00:00'}


In [84]:
docs = vector_store.docstore._dict

print(f"Stored documents: {len(docs)}")

# Print first document‚Äôs content
first_key = list(docs.keys())[0]
print(docs[first_key].page_content[:500])


Stored documents: 2973
9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Tel 91 22 6778 9595 Fax 91 22 6630 3672 e-mail corporate.office@tcs.com website www.tcs.com 
Registered Office 9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Corporate Identity No. (CIN): L22210MH1995PLC084781 
TCS/SE/143/2025-26 
November 12, 2025 
National Stock Exchange of India Limited  BSE Limited 
Exchange Plaza, C-1, Block G,  P. J. Towers,   
Bandra Kurla Complex, Bandra (East)  Dalal Street, 
Mumbai - 400051 Mumbai - 40000


In [85]:
for k, v in list(docs.items())[:3]:
    print("\nDocument ID:", k)
    print(v.page_content[:300])



Document ID: cc7da7a9-b234-43ac-9771-bc5cd1a0b6df
9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Tel 91 22 6778 9595 Fax 91 22 6630 3672 e-mail corporate.office@tcs.com website www.tcs.com 
Registered Office 9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Corporate Identity No. (CIN): L22210MH1995PLC084781 
TCS/SE/143/2025-26 
Nove

Document ID: f76bf6ed-9fde-4525-baf2-ccbba75b03ca
1. Copies of newspaper advertisement published in Business Standard (English),
Free Press Journal (English) and Navshakti (Marathi).
2. Copy of reminder letter to shareholders.
The above reminder letters were sent to the shareholders who have not claimed their dividends for 
seven or more consecutiv

Document ID: 45814ee1-b12f-450c-8150-d0d5a0aaf0f3
9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Tel 91 22 6778 9595 Fax 91 22 6630 3672 e-mail corporate.office@tcs.com website www.tcs.com 
Registered Office 9th Floor Nirmal Building Nariman Point Mumbai 400 021 
Corporate Identity No.

In [88]:
query = 'What were the key financial metrics (Revenue, Net Profit, Operating Margin, and Segment Performance) reported for this quarter?'

results = vector_store.similarity_search(query, k=3)

for r in results:
    print("\nResult chunk:\n", r.page_content)



Result chunk:
 of the Company. The annual accounts of the said subsidiaries will also be available for inspection, as above, at 
the head ofÔ¨Å ces/registered ofÔ¨Å ces of the respective subsidiary companies. The Company shall furnish a copy of 
the details of annual accounts of subsidiaries to any member on demand.
14. Fixed Deposits
 The Company has not accepted any public deposits and as such, no amount on account of principal or interest 
on public deposits was outstanding as on the date of the balance sheet.
15. Directors
 Mr. O. P. Bhatt and Mr. Cyrus Mistry have been appointed as Additional Directors on April 2, 2012. 
Mr. O. P. Bhatt is an Independent Director. As per the provisions of Section 260 of the Act, both the Directors 
hold ofÔ¨Å ce only up to the date of the forthcoming Annual General Meeting (AGM) of the Company and are 
eligible for appointment as Directors. The Company has received notices under Section 257 of the Act, in respect

Result chunk:
 committee, CSR Po

In [91]:
split_documents

[Document(metadata={'source': 'https://www.screener.in/company/TCS/consolidated/', 'title': 'Tata Consultancy Services Ltd share price | About TCS | Key Insights - Screener', 'description': 'TCS ¬∑ Mkt Cap: 11,23,778 Crore (down -25.1% in 1 year) ¬∑ Revenue: 2,57,688 Cr ¬∑ Profit: 49,687 Cr ¬∑ Stock is trading at 10.6 times its book value ¬∑ The company has delivered a poor sales growth of 10.2% over past five years. ¬∑ Promoter Holding: 71.8%', 'language': 'en'}, page_content='Tata Consultancy Services Ltd share price | About TCS | Key Insights - Screener\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n        Home\n      \n\n\n\n\n\n      Screens\n    \n\n\n\n\n\n\n\n\n\n\n      Tools\n    \n\n\n\n\n\n        Login\n      \n\n\n\n\n\n\n\n\n\n\n\n\n          \n            Home\n          \n        \nScreens\n\n\n            Tools\n            \n\n\n\n\n\n\n\n\nCreate a stock screen\nRun queries on 10 years of financial data\n\n\n\n\n\n\nPremium features\n\n\n\

In [90]:
from langchain_community.retrievers import BM25Retriever

# bm25_retriever = BM25Retriever.from(
#     corpus, metadatas=[{"source": 1}] * len(corpus)
# )
# bm25_retriever.k = 3

In [94]:
from langchain_community.retrievers import BM25Retriever

bm25_retriever = BM25Retriever.from_documents(
    split_documents
)
bm25_retriever.k = 3

In [100]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever

ModuleNotFoundError: No module named 'langchain.retrievers'

In [96]:
!pip install rank_bm25 langchain -qU


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
