In [None]:


import requests
import pandas as pd
from datetime import datetime
import time

# Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

def get_cme_settlements_robust(trade_date: str = None):
    """
    Scrapes CME settlement data using a Selenium-powered browser to bypass anti-bot protection.
    It initializes a browser to get valid session cookies, then uses requests to fetch the data.

    Args:
        trade_date (str, optional): Date in 'YYYYMMDD' format. Defaults to today.
    
    Returns:
        pandas.DataFrame: DataFrame with settlement data, or empty DataFrame on failure.
    """
    main_page_url = "https://www.cmegroup.com/markets/energy/refined-products/singapore-fob-marine-fuel-05-platts.settlements.html"
    api_url = "https://www.cmegroup.com/CmeWS/mvc/Settlements/Future/Settlements/4286/FUT"

    if not trade_date:
        trade_date = datetime.today().strftime('%Y%m%d')

    print("--- Step 1: Initializing automated browser to solve JS challenge ---")
    
    # Set up Chrome options for Selenium
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run in headless mode (no browser window opens)
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')

    # Use webdriver-manager to automatically handle the driver
    try:
        driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
        
        # Navigate to the page and wait for JS to execute and set cookies
        driver.get(main_page_url)
        print("Waiting for page to load and security challenges to complete...")
        time.sleep(8)  # Wait 8 seconds. This is crucial for the JS to run.

        # Extract cookies from the browser session
        browser_cookies = driver.get_cookies()
        print("Successfully extracted browser cookies.")
        
    except Exception as e:
        print(f"An error occurred during the Selenium browser session: {e}")
        return pd.DataFrame()
    finally:
        # Ensure the browser is closed
        if 'driver' in locals():
            driver.quit()

    print("\n--- Step 2: Using extracted cookies to fetch data with requests ---")

    # Create a requests session
    session = requests.Session()

    # Add the cookies extracted from Selenium to our new session
    for cookie in browser_cookies:
        session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])

    # These headers are still important
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Referer': main_page_url
    })

    params = {'tradeDate': trade_date, 'strategy': 'DEFAULT', 'pageSize': 500}
    
    try:
        print(f"Fetching data for trade date: {trade_date}...")
        response = session.get(api_url, params=params, timeout=15)
        response.raise_for_status()

        data = response.json()
        settlements_data = data.get('settlements', [])

        if not settlements_data:
            print(f"No settlement data found for {data.get('tradeDate', 'the specified date')}.")
            return pd.DataFrame()

        df = pd.DataFrame(settlements_data)
        print(f"Successfully retrieved data for trade date: {data.get('tradeDate')}")
        return df

    except requests.exceptions.RequestException as e:
        print(f"The final request to the API failed: {e}")
        return pd.DataFrame()


if __name__ == "__main__":
    print("--- Getting historical data for May 15, 2024 ---")
    historical_df = get_cme_settlements_robust(trade_date='20240515')
    if not historical_df.empty:
        print("\nHistorical Data for 2024-05-15:")
        print(historical_df[['month', 'settle', 'volume', 'openInterest']])

    print("\n" + "="*50 + "\n")

    print("--- Getting latest data ---")
    latest_df = get_cme_settlements_robust()
    if not latest_df.empty:
        print("\nLatest Data:")
        print(latest_df[['month', 'settle', 'volume', 'openInterest']])

Collecting selenium
  Downloading selenium-4.34.1-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting urllib3~=2.5.0 (from urllib3[socks]~=2.5.0->selenium)
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting certifi>=2025.6.15 (from selenium)
  Downloading certifi-2025.6.15-py3-none-any.whl.metadata (2.4 kB)
Collecting typing_extensions~=4.14.0 (from selenium)
  Downloading typing_extensions-4.14.1-py3-none-any.whl.metadata (3.0 kB)
Collecting websocket-client~=1.8.0 (from selenium)
  Downloading websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting attrs>=23.2.0 (from trio~=0.30.0->selenium)
  Downloading attrs-25.3.0-py3-none-any.wh