In [1]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

In [2]:
from datetime import datetime, timedelta

def get_date_range(start_date: str, end_date: str):
    """Generate list of dates from start_date to end_date inclusive."""
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    date_list = []
    current = start
    while current <= end:
        date_list.append(current.strftime("%Y-%m-%d"))
        current += timedelta(days=1)

    return date_list

# Example usage
dates = get_date_range("2025-06-29", "2026-01-28")
print(dates)


['2025-06-29', '2025-06-30', '2025-07-01', '2025-07-02', '2025-07-03', '2025-07-04', '2025-07-05', '2025-07-06', '2025-07-07', '2025-07-08', '2025-07-09', '2025-07-10', '2025-07-11', '2025-07-12', '2025-07-13', '2025-07-14', '2025-07-15', '2025-07-16', '2025-07-17', '2025-07-18', '2025-07-19', '2025-07-20', '2025-07-21', '2025-07-22', '2025-07-23', '2025-07-24', '2025-07-25', '2025-07-26', '2025-07-27', '2025-07-28', '2025-07-29', '2025-07-30', '2025-07-31', '2025-08-01', '2025-08-02', '2025-08-03', '2025-08-04', '2025-08-05', '2025-08-06', '2025-08-07', '2025-08-08', '2025-08-09', '2025-08-10', '2025-08-11', '2025-08-12', '2025-08-13', '2025-08-14', '2025-08-15', '2025-08-16', '2025-08-17', '2025-08-18', '2025-08-19', '2025-08-20', '2025-08-21', '2025-08-22', '2025-08-23', '2025-08-24', '2025-08-25', '2025-08-26', '2025-08-27', '2025-08-28', '2025-08-29', '2025-08-30', '2025-08-31', '2025-09-01', '2025-09-02', '2025-09-03', '2025-09-04', '2025-09-05', '2025-09-06', '2025-09-07', '2025

In [3]:
def download_file(url, output_dir):
    filename = url.split("/")[-1]
    file_path = os.path.join(output_dir, filename)

    try:
        with requests.get(url, stream=True, timeout=30) as response:
            response.raise_for_status()
            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Error downloading {filename}: {e}")

def download_for_date(date, output_dir):
    url_file = f"https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-{date}.zip"
    url_checksum = url_file + ".CHECKSUM"
    
    download_file(url_file, output_dir)
    download_file(url_checksum, output_dir)

In [4]:
%%time

# Params
download_dir = "binance_data"
dates = get_date_range("2025-06-30", "2026-01-28")

# Use ThreadPoolExecutor to run multiple downloads in parallel
max_workers = 12  # Adjust based on your network and system
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = [executor.submit(download_for_date, date, download_dir) for date in dates]

    for future in as_completed(futures):
        future.result()  # This will re-raise any exceptions

Downloaded: BTCUSDT-trades-2025-07-06.zip
Downloaded: BTCUSDT-trades-2025-07-04.zip
Downloaded: BTCUSDT-trades-2025-07-08.zip
Downloaded: BTCUSDT-trades-2025-07-07.zip
Downloaded: BTCUSDT-trades-2025-07-05.zip
Downloaded: BTCUSDT-trades-2025-07-09.zip
Downloaded: BTCUSDT-trades-2025-07-02.zip
Downloaded: BTCUSDT-trades-2025-07-04.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-01.zip
Downloaded: BTCUSDT-trades-2025-07-07.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-08.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-03.zip
Downloaded: BTCUSDT-trades-2025-06-30.zip
Downloaded: BTCUSDT-trades-2025-07-10.zip
Downloaded: BTCUSDT-trades-2025-07-05.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-06.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-09.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-02.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-11.zip
Downloaded: BTCUSDT-trades-2025-07-01.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-06-30.zip.CHECKSUM
Downloaded: BTCUSDT-trades-2025-07-03

In [None]:
# When i was scraping sequentially

# for date in dates:

#     # Replace with your actual .zip URL
#     url_file = f"https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-{date}.zip"
#     url_checksum = url_file + ".CHECKSUM"
    
#     filename = url_file.split("/")[-1]  # Extracts filename from URL
#     file_path = os.path.join(download_dir, filename)
    
#     # Download the file
#     with requests.get(url_file, stream=True) as response:
#         response.raise_for_status()  # Raise an error for bad status codes
#         with open(file_path, "wb") as f:
#             for chunk in response.iter_content(chunk_size=8192):
#                 f.write(chunk)
    
#     print(f"Downloaded: {filename}")

#     # Download the checksum
#     filename = url_checksum.split("/")[-1]  # Extracts filename from URL
#     file_path = os.path.join(download_dir, filename)
    
#     with requests.get(url_checksum, stream=True) as response:
#         response.raise_for_status()  # Raise an error for bad status codes
#         with open(file_path, "wb") as f:
#             for chunk in response.iter_content(chunk_size=8192):
#                 f.write(chunk)
    
#     print(f"Downloaded: {filename}")

