In [1]:
import os
import json
import time
import requests
from pathlib import Path
import gzip, json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, Iterable

In [2]:
def get_states():
    url = "https://enam.gov.in/web/ajax_ctrl/states_name"
    response = requests.get(url)
    return response.json()

In [None]:
get_states()

In [3]:
headers = {
    "User-Agent": "Mozilla/5.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

In [4]:
def get_apmcs():
    apmcs = []
    url = "https://enam.gov.in/web/Ajax_ctrl/apmc_list"
    states = get_states()['data']
    # Extract all state_ids
    state_ids = [item['state_id'] for item in states]
    for state_id in state_ids:
        payload = { 'state_id': state_id}
        response = requests.post(url, data=payload, headers=headers)
        apmcs.extend(response.json()['data'])
    return apmcs

In [None]:
get_apmcs()

In [5]:
def date_range(start_date: str, end_date: str):
    """
    Yields date strings (YYYY-MM-DD) from start_date to end_date inclusive.
    """
    start = datetime.strptime(start_date, "%Y-%m-%d").date()
    end = datetime.strptime(end_date, "%Y-%m-%d").date()

    current = start
    while current <= end:
        yield current.strftime("%Y-%m-%d")
        current += timedelta(days=1)

In [None]:
date_range("2025-08-01", "2025-08-12")

In [6]:
def save_jsonl_gz(data: list, jsonl_path: str) -> None:
    """
    Append DataFrame rows to a JSONL file (one JSON object per line).
    """
    if not data:
        return
    with gzip.open(jsonl_path, "wt", encoding="utf-8") as f:
        for row in data:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")

In [7]:
def to_date(s: str) -> datetime:
    """
    Parse a date string into a datetime object.
    Supports common formats like:
    - YYYY-MM-DD
    - DD-MM-YYYY
    - DD/MM/YYYY
    - YYYY/MM/DD
    - MM/DD/YYYY
    """
    try:
        return datetime.fromisoformat(s)  # handles YYYY-MM-DD
    except ValueError:
        for fmt in ("%d-%m-%Y", "%d/%m/%Y", "%Y/%m/%d", "%m/%d/%Y"):
            try:
                return datetime.strptime(s, fmt)
            except ValueError:
                continue
    raise ValueError(f"Unrecognized date format: {s}")

In [8]:
def get_trade_data(date_str: str) -> list:
    url = "https://enam.gov.in/web/Ajax_ctrl/trade_data_list"
    payload = {
        "language": "en",
        "stateName": "-- All --",
        "apmcName": "-- Select APMCs --",
        "commodityName": "-- Select Commodity --"    
    }
    payload["fromDate"] = date_str
    payload["toDate"] = date_str
    response = requests.post(url, data=payload, headers=headers)
    return response.json()['data']

In [None]:
get_trade_data("2025-08-12")

In [9]:
def get_trade_date_range(start_date: str, end_date: str, jsonl_path: str) -> list:
    all_data = []
    for d in date_range(start_date, end_date):
        dt = datetime.strptime(d, "%Y-%m-%d")
        date_str = dt.strftime("%Y-%m-%d")
        year_str = dt.strftime("%Y") 
        month_str = dt.strftime("%b") 
        data = get_trade_data(date_str)
        day_dir = Path(jsonl_path)
        out_path = day_dir / year_str / month_str / f"{date_str}.jsonl.gz"
        out_path.parent.mkdir(parents=True, exist_ok=True)
        print(f"Processing for date: {date_str}")
        save_jsonl_gz(data, out_path)
        all_data.extend(data)
    return all_data

In [10]:
output_location = "../data/enam/"
trade_data_list = get_trade_date_range("2025-01-01", "2025-12-31", output_location)

Processing for date: 2025-01-01
Processing for date: 2025-01-02
Processing for date: 2025-01-03
Processing for date: 2025-01-04
Processing for date: 2025-01-05
Processing for date: 2025-01-06
Processing for date: 2025-01-07
Processing for date: 2025-01-08
Processing for date: 2025-01-09
Processing for date: 2025-01-10
Processing for date: 2025-01-11
Processing for date: 2025-01-12
Processing for date: 2025-01-13
Processing for date: 2025-01-14
Processing for date: 2025-01-15
Processing for date: 2025-01-16
Processing for date: 2025-01-17
Processing for date: 2025-01-18
Processing for date: 2025-01-19
Processing for date: 2025-01-20
Processing for date: 2025-01-21
Processing for date: 2025-01-22
Processing for date: 2025-01-23
Processing for date: 2025-01-24
Processing for date: 2025-01-25
Processing for date: 2025-01-26
Processing for date: 2025-01-27
Processing for date: 2025-01-28
Processing for date: 2025-01-29
Processing for date: 2025-01-30
Processing for date: 2025-01-31
Processi

KeyError: 'data'

In [None]:
len(trade_data_list)

In [None]:
output_file = 'enam.jsonl'
# Save as JSONL
with open(output_file, "w", encoding="utf-8") as f:
    for record in trade_data_list:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")

In [None]:
url = "https://enam.gov.in/web/Ajax_ctrl/commodity_list"
payload = {
    "language": "en",
    "stateName": "ANDHRA PRADESH",
    "apmcName": "-- Select APMCs --",
    "fromDate": "2025-08-13",
    "toDate": "2025-08-13"
}
response = requests.post(url, data=payload, headers=headers)
commodity_list = response.json()['data']
commodity_list

In [None]:
len(commodity_list)