In [1]:
pip install requests pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import requests
import pandas as pd
import time

API_KEY = "579b464db66ec23bdd00000108116827059c448a4869b2777966c12b"
BASE_URL = "https://api.data.gov.in/resource/8b68ae56-84cf-4728-a0a6-1be11028dea7"
LIMIT = 100

STATES = ["MAHARASHTRA"]

MAX_PER_STATE = 100000  # ✅ This controls how many per state

headers = {"User-Agent": "Mozilla/5.0", "Accept": "application/json"}

for state in STATES:
    print(f"\n⬇️ Fetching data for: {state}...")
    OFFSET = 0
    all_records = []
    retry_count = 0

    while True:
        params = {
            "api-key": API_KEY,
            "format": "json",
            "limit": LIMIT,
            "offset": OFFSET,
            "filters[State]": state
        }

        try:
            response = requests.get(BASE_URL, params=params, headers=headers, timeout=60)
            response.raise_for_status()
            data = response.json()
        except Exception as e:
            retry_count += 1
            if retry_count > 5:
                print(f"❌ Skipping {state} after multiple failures.")
                break
            wait_time = 5 * retry_count
            print(f"⚠️ Timeout/error. Retrying in {wait_time}s...")
            time.sleep(wait_time)
            continue

        retry_count = 0  

        if "records" not in data or not data["records"]:
            print(f"✅ Completed {state} with {len(all_records)} records.")
            break

        all_records.extend(data["records"])
        OFFSET += LIMIT
        print(f"  → {len(all_records)} records fetched...", end="\r")

        # ✅ STOP IF REACHED LIMIT FOR THIS STATE
        if len(all_records) >= MAX_PER_STATE:
            print(f"\n✅ Reached {MAX_PER_STATE} records for {state}. Moving to next state.")
            break

        time.sleep(1)

    if all_records:
        df = pd.DataFrame(all_records)
        filename = f"msme_{state.replace(' ', '_')}.csv"
        df.to_csv(filename, index=False)
        print(f"📁 Saved: {filename}")
    else:
        print(f"⚠️ No data saved for {state}")



⬇️ Fetching data for: MAHARASHTRA...
⚠️ Timeout/error. Retrying in 5s...
⚠️ Timeout/error. Retrying in 5s...
⚠️ Timeout/error. Retrying in 10s...
⚠️ Timeout/error. Retrying in 15s...
⚠️ Timeout/error. Retrying in 20s...
⚠️ Timeout/error. Retrying in 25s...
❌ Skipping MAHARASHTRA after multiple failures.
📁 Saved: msme_MAHARASHTRA.csv
