In [284]:
import numpy as np
import json, re, os, requests, uuid, base64, io
from openai import AzureOpenAI
import logging
from azure.storage.blob import BlobServiceClient
import tempfile
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed
import ast
import requests
from azure.search.documents import SearchClient
from azure.core.pipeline.transport import RequestsTransport
from azure.core.credentials import AzureKeyCredential

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [285]:
import os
import json
import logging
from typing import Any, Dict, List, Optional, Set

import requests
from openai import AzureOpenAI

def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)s | %(message)s",
        force=True
    )

setup_logging()

In [286]:
# -------------------------
# HARD CODED CREDENTIALS
# -------------------------

AZURE_OPENAI_ENDPOINT = "https://hrtransformation.openai.azure.com/"
AZURE_OPENAI_API_KEY = "AFd7UMq9pERgXT9w1z7IOvcD84px0dHEUHwHgMh1PDycGm6JJVC5JQQJ99BBAC77bzfXJ3w3AAABACOG28lv"
AZURE_OPENAI_API_VERSION = "2024-05-01-preview"
AZURE_OPENAI_DEPLOYMENT = "gpt-4.1"   

AZURE_SEARCH_ENDPOINT = "https://hrtransformation.search.windows.net"
AZURE_SEARCH_API_KEY = "kFixV6YiahYG6AvOIInbQQfqCJrXbN0IUWTlw9ZYkOAzSeDFroKy"

# -------------------------
# INDEX MAPPING
# -------------------------

INDEX_MAP = {
    "Act": "taxation-acts",
    "Rule": "taxation-rules",
    "Circular": "taxation-circular-notification",
}

DEFAULT_TOP_K = 10
VERIFY_TLS = False 
SEARCH_FIELDS = None
SELECT_FIELDS = None

In [287]:
def classify_query(user_query: str) -> List[str]:
    prompt = f"""
Classify the query into categories: [Act, Rule, Circular].
Query: {user_query}

Return JSON:
{{
  "category": ["Act", "Rule"]
}}
"""
    raw = gpt_client(prompt)
    try:
        obj = json.loads(raw)
        cats = obj.get("category", [])
        if isinstance(cats, str):
            cats = [cats]
        return [c for c in cats if c in INDEX_MAP]
    except:
        return list(INDEX_MAP.keys())


def rewrite_query(user_query: str) -> str:
    prompt = f"""
Rewrite this query to maximize search relevance:
User query: {user_query}

Return:
{{
  "rewritten_query": "<your text>"
}}
"""
    raw = gpt_client(prompt)
    try:
        obj = json.loads(raw)
        rq = obj.get("rewritten_query", "").strip()
        return rq or user_query
    except:
        return user_query

In [288]:
def gpt_client(prompt: str, contents: Optional[List[Dict[str, Any]]] = None) -> str:
    """
    Robust JSON-only Azure OpenAI call.
    Ensures the word 'json' is present in messages to satisfy response_format=json_object.
    """
    if contents is None:
        contents = []

    # Always include 'json' so the API allows response_format={"type":"json_object"}
    system_msg = (
        "You are a helpful assistant. Always return ONLY valid JSON (json). "
        "Do not include any explanatory text outside the JSON."
    )
    json_hint = {"type": "text", "text": "IMPORTANT: Output must be valid json only. No prose."}

    try:
        client = AzureOpenAI(
            azure_endpoint=AZURE_OPENAI_ENDPOINT,
            api_key=AZURE_OPENAI_API_KEY,
            api_version=AZURE_OPENAI_API_VERSION
        )

        completion = client.chat.completions.create(
            model=AZURE_OPENAI_DEPLOYMENT,
            messages=[
                {"role": "system", "content": system_msg},
                {"role": "user", "content": [{"type": "text", "text": prompt}, json_hint, *(contents or [])]},
            ],
            response_format={"type": "json_object"},
            timeout=300
        )
        return completion.choices[0].message.content or "{}"

    except Exception as e:
        logging.exception(f"OpenAI Error: {e}")
        return "{}"

In [289]:
def search_index(
    search_keywords: str,
    index_name: str,
    top_k: int = DEFAULT_TOP_K,
    search_fields: Optional[List[str]] = SEARCH_FIELDS,
    select_fields: Optional[List[str]] = SELECT_FIELDS,
) -> Dict[str, Any]:

    url = f"{AZURE_SEARCH_ENDPOINT}/indexes/{index_name}/docs/search?api-version=2024-07-01"
    headers = {"Content-Type": "application/json", "api-key": AZURE_SEARCH_API_KEY}
    body = {"search": search_keywords, "top": top_k}

    if search_fields:
        body["searchFields"] = search_fields
    if select_fields:
        body["select"] = select_fields

    # Mirror your uploaded code's behavior:
    resp = requests.post(url, headers=headers, json=body, timeout=30, verify=False)
    resp.raise_for_status()
    data = resp.json()
    data["_index_name"] = index_name
    return data

In [290]:
def collect_chunks(responses: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    seen = set()
    results = []

    for res in responses:
        index_name = res.get("_index_name")
        for doc in res.get("value", []):
            chunk = doc.get("chunk")
            if not chunk:
                continue
            if chunk in seen:
                continue

            seen.add(chunk)

            results.append({
                "chunk": chunk,
                "score": doc.get("@search.score"),
                "id": doc.get("id") or doc.get("key") or doc.get("document_id"),
                "index": index_name,
                "title": doc.get("title"),
                "section": doc.get("section")
            })

    return results

In [291]:
def run_pipeline(user_query: str, top_k: int = DEFAULT_TOP_K) -> List[Dict[str, Any]]:

    categories = classify_query(user_query)
    logging.info(f"Categories: {categories}")

    rewritten_query = rewrite_query(user_query)
    logging.info(f"Rewritten Query: {rewritten_query}")

    responses = []

    for cat in categories:
        index_name = INDEX_MAP.get(cat)
        if not index_name:
            continue
        try:
            result = search_index(rewritten_query, index_name, top_k)
            responses.append(result)
        except Exception as e:
            logging.error(f"Search error for {index_name}: {e}")

    chunks = collect_chunks(responses)
    return chunks

In [292]:
query = "Tax rates for domestic company  for FY 2023-24, FY 2024-25 and FY 2025-26"
chunks = run_pipeline(query, top_k=10)

print("Total Unique Chunks:", len(chunks))
for i, c in enumerate(chunks, start=1):
    print(f"{i}. [{c['index']}] (score={c['score']})")
    print(c["chunk"][:200], "...\n")

2026-01-29 13:40:34,145 | INFO | HTTP Request: POST https://hrtransformation.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2026-01-29 13:40:34,149 | INFO | Categories: ['Act', 'Rule']
2026-01-29 13:40:36,532 | INFO | HTTP Request: POST https://hrtransformation.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"
2026-01-29 13:40:36,535 | INFO | Rewritten Query: Latest tax rates applicable to domestic companies in India for financial years 2023-24, 2024-25, and 2025-26


Total Unique Chunks: 19
1. [taxation-acts] (score=40.45608)
Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002
Sick Industrial Companies (Special Provisions) Act, 1985
SICK INDUSTRIAL COMPANIES (SPECIAL PROVISIONS) REPEAL ACT, 20 ...

2. [taxation-acts] (score=30.515665)
Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002
Sick Industrial Companies (Special Provisions) Act, 1985
SICK INDUSTRIAL COMPANIES (SPECIAL PROVISIONS) REPEAL ACT, 20 ...

3. [taxation-acts] (score=30.126259)
Act: Income-tax Act, 1961
Section: Section - 43D
nSpecial provision in case of income of public financial institutions, 23[***] etc.
43D. Notwithstanding anything to the contrary contained in any othe ...

4. [taxation-acts] (score=29.127035)
Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002
Sick Industrial Companies (Special Provisions) Act, 1985
SICK INDUSTRIAL COMPANIES (SPECIAL PROVISIONS) REPEAL ACT, 20 ...

5. [t

In [293]:
# Recompute rewritten query (keeps run_pipeline unchanged)
rewritten_query = rewrite_query(query)
print("Rewritten query:", rewritten_query)

from typing import Tuple

def select_context_chunks(
    chunks: List[Dict[str, Any]],
    top_n: int = 20,
    max_chars_per_chunk: int = 1200
) -> List[Dict[str, Any]]:
    """
    Pick the top-N chunks by search score and trim each chunk to a manageable size.
    Keeps provenance (index, id, score) for citations.
    """
    # Sort by score desc, None handling last
    def score_key(c):
        s = c.get("score")
        return (-s) if isinstance(s, (int, float)) else float("inf")

    sorted_chunks = sorted(chunks, key=score_key)
    selected = []
    for c in sorted_chunks[:top_n]:
        text = (c.get("chunk") or "").strip()
        if len(text) > max_chars_per_chunk:
            text = text[:max_chars_per_chunk] + " ..."
        selected.append({
            "id": c.get("id"),
            "index": c.get("index"),
            "score": c.get("score"),
            "text": text
        })
    return selected

# Choose context (tweak top_n / max_chars_per_chunk for your model context window)
context_chunks = select_context_chunks(chunks, top_n=20, max_chars_per_chunk=1200)
print(f"Context chunks selected: {len(context_chunks)}")
for i, c in enumerate(context_chunks, 1):
    print(f"{i:02d}. [idx={c['index']}] [id={c['id']}] (score={c['score']}) -> {c['text'][:140].replace(chr(10),' ')}...")

2026-01-29 13:40:40,847 | INFO | HTTP Request: POST https://hrtransformation.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


Rewritten query: Latest and official tax rates applicable to domestic companies in India for financial years 2023-24, 2024-25, and 2025-26, including breakdowns of corporate tax, surcharge, and cess.
Context chunks selected: 19
01. [idx=taxation-acts] [id=None] (score=40.45608) -> Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002 Sick Industrial Companies (Special Provisions) Act, 1985 S...
02. [idx=taxation-acts] [id=None] (score=30.515665) -> Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002 Sick Industrial Companies (Special Provisions) Act, 1985 S...
03. [idx=taxation-acts] [id=None] (score=30.126259) -> Act: Income-tax Act, 1961 Section: Section - 43D nSpecial provision in case of income of public financial institutions, 23[***] etc. 43D. No...
04. [idx=taxation-acts] [id=None] (score=29.127035) -> Reconstruction of Financial Assets And Enforcement of Security Interest Act, 2002 Sick Industrial Companies (Special Prov

In [294]:
# --- TEXT-ONLY FINAL ANSWER (does not require JSON parsing) ---

from typing import List, Dict, Any
from openai import AzureOpenAI

def build_context_block(context_chunks: List[Dict[str, Any]]) -> str:
    """
    Render the selected chunks into a compact, numbered context block for the model.
    """
    lines = []
    for i, c in enumerate(context_chunks, 1):
        lines.append(
            f"[{i}] (index={c.get('index')}, id={c.get('id')}, score={c.get('score')})\n{c.get('text')}"
        )
    return "\n\n".join(lines)


def gpt_client_text(prompt: str) -> str:
    """
    Lightweight text-mode client: no response_format=json_object, so the model can return plain text.
    Uses the same hard-coded settings you defined earlier.
    """
    client = AzureOpenAI(
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        api_key=AZURE_OPENAI_API_KEY,
        api_version=AZURE_OPENAI_API_VERSION
    )
    completion = client.chat.completions.create(
        model=AZURE_OPENAI_DEPLOYMENT,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a taxation expert specializing in Indian Income Tax laws. "
                    "Be accurate, concise, and pragmatic. Structure your answer with headings and bullets where helpful."
                )
            },
            {"role": "user", "content": [{"type": "text", "text": prompt}]}
        ],
        timeout=300
    )
    return completion.choices[0].message.content or ""


def ask_llm_for_final_answer(rewritten_query: str, context_chunks: List[Dict[str, Any]]) -> str:
    """
    Ask the LLM to synthesize a final, human-readable answer using BOTH:
      - the provided context (if relevant), and
      - the model's own general knowledge.
    """

    context_block = build_context_block(context_chunks)

    prompt = f"""
Answer the user's question using BOTH:
1) the context provided below (if relevant), and
2) your own domain knowledge.

Use the provided context when it helps clarify the answer, and rely on your broader knowledge when the context is incomplete. If something is unclear or not directly supported, you may mention your interpretation or typical understanding. When referring to a specific part of the context, cite it using bracketed numbers like [1], [2]. Present the response as a clear, well‑structured text with helpful headings and bullet points.

# Question
{rewritten_query}

# Context (optional; may be partial or noisy)
{context_block}

# Writing rules
- If context conflicts with your general knowledge, prefer the context.
- If you cannot find a part of the answer in context, you may rely on your expertise; say so briefly.
"""

    return gpt_client_text(prompt)


# --- Run the final answer generation and display it as text ---

final_text = ask_llm_for_final_answer(rewritten_query, context_chunks)
print(final_text)


2026-01-29 13:40:55,485 | INFO | HTTP Request: POST https://hrtransformation.openai.azure.com/openai/deployments/gpt-4.1/chat/completions?api-version=2024-05-01-preview "HTTP/1.1 200 OK"


# Corporate Tax Rates for Domestic Companies in India: FY 2023-24, 2024-25, and 2025-26

There have been **no major changes** in the headline corporate tax rates in the Finance Acts 2023, 2024, and 2025 for domestic companies. The same rates and regimes continue as per the **Income-tax Act, 1961**, as amended by recent Finance Acts ([1], [6], [7], [8]).

Below is a concise and accurate breakdown.

---

## 1. **Standard Tax Regimes for Domestic Companies**

| FY (AY)                    | 2023-24 (AY 2024-25) | 2024-25 (AY 2025-26) | 2025-26 (AY 2026-27) |
|----------------------------|----------------------|----------------------|----------------------|
| Applicable rates           | Same for all years, unless amended in future finance acts |

### (A) **Normal Provisions**

- **Basic Tax Rate**:  
  - **25%**: If total turnover or gross receipts of the company in FY 2021-22 ≤ ₹400 crore  
  - **30%**: For all other domestic companies

#### Surcharge
- 7%: Where total income > ₹1 crore b