In [15]:
import os
import json
import time
import requests
from pathlib import Path
import gzip, json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, Iterable

In [2]:
def get_states():
    url = "https://enam.gov.in/web/ajax_ctrl/states_name"
    response = requests.get(url)
    return response.json()

In [3]:
get_states()

{'data': [{'state_name': 'ANDAMAN AND NICOBAR ISLANDS', 'state_id': '37'},
  {'state_name': 'ANDHRA PRADESH', 'state_id': '276'},
  {'state_name': 'ASSAM', 'state_id': '38'},
  {'state_name': 'BIHAR', 'state_id': '33'},
  {'state_name': 'CHANDIGARH', 'state_id': '526'},
  {'state_name': 'CHHATTISGARH', 'state_id': '100'},
  {'state_name': 'GOA', 'state_id': '34'},
  {'state_name': 'GUJARAT', 'state_id': '22'},
  {'state_name': 'HARYANA', 'state_id': '32'},
  {'state_name': 'HIMACHAL PRADESH', 'state_id': '43'},
  {'state_name': 'JAMMU AND KASHMIR', 'state_id': '696'},
  {'state_name': 'JHARKHAND', 'state_id': '47'},
  {'state_name': 'KARNATAKA', 'state_id': '695'},
  {'state_name': 'KERALA', 'state_id': '694'},
  {'state_name': 'MADHYA PRADESH', 'state_id': '20'},
  {'state_name': 'MAHARASHTRA', 'state_id': '296'},
  {'state_name': 'NAGALAND', 'state_id': '35'},
  {'state_name': 'ODISHA', 'state_id': '384'},
  {'state_name': 'PUDUCHERRY', 'state_id': '599'},
  {'state_name': 'PUNJAB', 

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0",
    "Content-Type": "application/x-www-form-urlencoded"
}

In [5]:
def get_apmcs():
    apmcs = []
    url = "https://enam.gov.in/web/Ajax_ctrl/apmc_list"
    states = get_states()['data']
    # Extract all state_ids
    state_ids = [item['state_id'] for item in states]
    for state_id in state_ids:
        payload = { 'state_id': state_id}
        response = requests.post(url, data=payload, headers=headers)
        apmcs.extend(response.json()['data'])
    return apmcs

In [6]:
get_apmcs()

[{'apmc_id': '5038', 'apmc_name': 'ANDAMAN HI TECH KISAN MANDI'},
 {'apmc_id': '316', 'apmc_name': 'ADONI'},
 {'apmc_id': '314', 'apmc_name': 'ANAKAPALLI'},
 {'apmc_id': '289', 'apmc_name': 'ANANTHAPURAMU'},
 {'apmc_id': '792', 'apmc_name': 'BOBBILI APMC'},
 {'apmc_id': '793', 'apmc_name': 'CHILAKALURIPET APMC'},
 {'apmc_id': '291', 'apmc_name': 'DENDULURU'},
 {'apmc_id': '312', 'apmc_name': 'DUGGIRALA'},
 {'apmc_id': '311', 'apmc_name': 'ELURU'},
 {'apmc_id': '282', 'apmc_name': 'GOPALAPURAM'},
 {'apmc_id': '313', 'apmc_name': 'GUNTUR'},
 {'apmc_id': '293', 'apmc_name': 'GURRAMKONDA'},
 {'apmc_id': '318', 'apmc_name': 'HINDUPUR'},
 {'apmc_id': '794', 'apmc_name': 'ICHAPURAM APMC'},
 {'apmc_id': '795', 'apmc_name': 'JAGGAYYAPET APMC'},
 {'apmc_id': '320', 'apmc_name': 'KADAPA'},
 {'apmc_id': '292', 'apmc_name': 'KALIKIRI'},
 {'apmc_id': '319', 'apmc_name': 'Kalyandurg'},
 {'apmc_id': '315', 'apmc_name': 'KURNOOL'},
 {'apmc_id': '288', 'apmc_name': 'MADANAPALLE'},
 {'apmc_id': '290', 'a

In [7]:
def date_range(start_date: str, end_date: str):
    """
    Yields date strings (YYYY-MM-DD) from start_date to end_date inclusive.
    """
    start = datetime.strptime(start_date, "%Y-%m-%d").date()
    end = datetime.strptime(end_date, "%Y-%m-%d").date()

    current = start
    while current <= end:
        yield current.strftime("%Y-%m-%d")
        current += timedelta(days=1)

In [8]:
date_range("2025-08-01", "2025-08-12")

<generator object date_range at 0x111635e70>

In [9]:
def save_jsonl_gz(data: list, jsonl_path: str) -> None:
    """
    Append DataFrame rows to a JSONL file (one JSON object per line).
    """
    if not data:
        return
    with gzip.open(jsonl_path, "wt", encoding="utf-8") as f:
        for row in data:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")

In [10]:
def to_date(s: str) -> datetime:
    """
    Parse a date string into a datetime object.
    Supports common formats like:
    - YYYY-MM-DD
    - DD-MM-YYYY
    - DD/MM/YYYY
    - YYYY/MM/DD
    - MM/DD/YYYY
    """
    try:
        return datetime.fromisoformat(s)  # handles YYYY-MM-DD
    except ValueError:
        for fmt in ("%d-%m-%Y", "%d/%m/%Y", "%Y/%m/%d", "%m/%d/%Y"):
            try:
                return datetime.strptime(s, fmt)
            except ValueError:
                continue
    raise ValueError(f"Unrecognized date format: {s}")

In [11]:
def get_trade_data(date_str: str) -> list:
    url = "https://enam.gov.in/web/Ajax_ctrl/trade_data_list"
    payload = {
        "language": "en",
        "stateName": "-- All --",
        "apmcName": "-- Select APMCs --",
        "commodityName": "-- Select Commodity --"    
    }
    payload["fromDate"] = date_str
    payload["toDate"] = date_str
    response = requests.post(url, data=payload, headers=headers)
    return response.json()['data']

In [12]:
get_trade_data("2025-08-12")

[{'id': '32145869',
  'state': 'ODISHA',
  'apmc': 'SAKHIGOPAL',
  'commodity': 'COCONUT',
  'min_price': '7',
  'modal_price': '7',
  'max_price': '30',
  'commodity_arrivals': '183352',
  'commodity_traded': '183332',
  'created_at': '2025-08-12',
  'status': '1',
  'Commodity_Uom': 'Nos'},
 {'id': '32147785',
  'state': 'RAJASTHAN',
  'apmc': 'MANDAWARI',
  'commodity': 'MUSTARD',
  'min_price': '6249',
  'modal_price': '6934',
  'max_price': '7000',
  'commodity_arrivals': '3192',
  'commodity_traded': '16524',
  'created_at': '2025-08-12',
  'status': '1',
  'Commodity_Uom': 'Qui'},
 {'id': '32148596',
  'state': 'RAJASTHAN',
  'apmc': 'UDAIPUR F AND V',
  'commodity': 'COCONUT',
  'min_price': '15',
  'modal_price': '15',
  'max_price': '30',
  'commodity_arrivals': '16200',
  'commodity_traded': '16200',
  'created_at': '2025-08-12',
  'status': '1',
  'Commodity_Uom': 'Nos'},
 {'id': '32147782',
  'state': 'RAJASTHAN',
  'apmc': 'MANDAWARI',
  'commodity': 'WHEAT',
  'min_price

In [13]:
def get_trade_date_range(start_date: str, end_date: str, jsonl_path: str) -> list:
    all_data = []
    for d in date_range(start_date, end_date):
        dt = datetime.strptime(d, "%Y-%m-%d")
        date_str = dt.strftime("%Y-%m-%d")
        year_str = dt.strftime("%Y") 
        month_str = dt.strftime("%b") 
        data = get_trade_data(date_str)
        day_dir = Path(jsonl_path)
        out_path = day_dir / year_str / month_str / f"{date_str}.jsonl.gz"
        out_path.parent.mkdir(parents=True, exist_ok=True)
        print(f"Processing for date: {date_str}")
        save_jsonl_gz(data, out_path)
        all_data.extend(data)
    return all_data

In [17]:
output_location = "../data/enam/"
trade_data_list = get_trade_date_range("2024-01-01", "2024-12-31", output_location)

Processing for date: 2024-01-01
Processing for date: 2024-01-02
Processing for date: 2024-01-03
Processing for date: 2024-01-04
Processing for date: 2024-01-05
Processing for date: 2024-01-06
Processing for date: 2024-01-07
Processing for date: 2024-01-08
Processing for date: 2024-01-09
Processing for date: 2024-01-10
Processing for date: 2024-01-11
Processing for date: 2024-01-12
Processing for date: 2024-01-13
Processing for date: 2024-01-14
Processing for date: 2024-01-15
Processing for date: 2024-01-16
Processing for date: 2024-01-17
Processing for date: 2024-01-18
Processing for date: 2024-01-19
Processing for date: 2024-01-20
Processing for date: 2024-01-21
Processing for date: 2024-01-22
Processing for date: 2024-01-23
Processing for date: 2024-01-24
Processing for date: 2024-01-25
Processing for date: 2024-01-26
Processing for date: 2024-01-27
Processing for date: 2024-01-28
Processing for date: 2024-01-29
Processing for date: 2024-01-30
Processing for date: 2024-01-31
Processi

In [None]:
len(trade_data_list)

In [None]:
output_file = 'enam.jsonl'
# Save as JSONL
with open(output_file, "w", encoding="utf-8") as f:
    for record in trade_data_list:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")

In [None]:
url = "https://enam.gov.in/web/Ajax_ctrl/commodity_list"
payload = {
    "language": "en",
    "stateName": "ANDHRA PRADESH",
    "apmcName": "-- Select APMCs --",
    "fromDate": "2025-08-13",
    "toDate": "2025-08-13"
}
response = requests.post(url, data=payload, headers=headers)
commodity_list = response.json()['data']
commodity_list

In [None]:
len(commodity_list)