In [None]:
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
from datetime import datetime, timedelta

def get_date_range(start_date: str, end_date: str):
    """Generate list of dates from start_date to end_date inclusive."""
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")

    date_list = []
    current = start
    while current <= end:
        date_list.append(current.strftime("%Y-%m-%d"))
        current += timedelta(days=1)

    return date_list

# Example usage
dates = get_date_range("2025-06-29", "2025-07-2")
print(dates)


In [None]:
def download_file(url, output_dir):
    filename = url.split("/")[-1]
    file_path = os.path.join(output_dir, filename)

    try:
        with requests.get(url, stream=True, timeout=30) as response:
            response.raise_for_status()
            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Error downloading {filename}: {e}")

def download_for_date(date, output_dir):
    url_file = f"https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-{date}.zip"
    url_checksum = url_file + ".CHECKSUM"
    
    download_file(url_file, output_dir)
    download_file(url_checksum, output_dir)

In [None]:
%%time

# Params
download_dir = "binance_data"
dates = get_date_range("2017-08-17", "2025-06-30")

# Use ThreadPoolExecutor to run multiple downloads in parallel
max_workers = 12  # Adjust based on your network and system
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = [executor.submit(download_for_date, date, download_dir) for date in dates]

    for future in as_completed(futures):
        future.result()  # This will re-raise any exceptions

In [None]:
# When i was scraping sequentially

# for date in dates:

#     # Replace with your actual .zip URL
#     url_file = f"https://data.binance.vision/data/spot/daily/trades/BTCUSDT/BTCUSDT-trades-{date}.zip"
#     url_checksum = url_file + ".CHECKSUM"
    
#     filename = url_file.split("/")[-1]  # Extracts filename from URL
#     file_path = os.path.join(download_dir, filename)
    
#     # Download the file
#     with requests.get(url_file, stream=True) as response:
#         response.raise_for_status()  # Raise an error for bad status codes
#         with open(file_path, "wb") as f:
#             for chunk in response.iter_content(chunk_size=8192):
#                 f.write(chunk)
    
#     print(f"Downloaded: {filename}")

#     # Download the checksum
#     filename = url_checksum.split("/")[-1]  # Extracts filename from URL
#     file_path = os.path.join(download_dir, filename)
    
#     with requests.get(url_checksum, stream=True) as response:
#         response.raise_for_status()  # Raise an error for bad status codes
#         with open(file_path, "wb") as f:
#             for chunk in response.iter_content(chunk_size=8192):
#                 f.write(chunk)
    
#     print(f"Downloaded: {filename}")

