In [6]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta

BINANCE_BASE_URL = "https://api.binance.com/api/v3"

def fetch_candlestick_data(symbol, interval, start_time=None, end_time=None, limit=1000):
    url = f"{BINANCE_BASE_URL}/klines"
    params = {
        'symbol': symbol,
        'interval': interval,
        'limit': limit,
        'startTime': start_time,
        'endTime': end_time
    }
    response = requests.get(url, params=params)
    data = response.json()
    
    # Print the raw data to verify the structure
    if data:
        print(data[0])  # Print the first entry to check the structure

    # Define the columns based on the API response
    columns = [
        'OpenTime', 'Open', 'High', 'Low', 'Close', 'Volume',
        'CloseTime', 'QuoteAssetVolume', 'Trades', 'TakerBuyBase', 'TakerBuyQuote', 'Ignore'
    ]

    # Create DataFrame
    df = pd.DataFrame(data, columns=columns)

    # Check if 'CloseTime' is in the DataFrame
    if 'CloseTime' not in df.columns:
        print("Warning: 'CloseTime' column is missing from the DataFrame.")
        # Handle the missing column as needed

    return df[['Open', 'High', 'Low', 'Close', 'Volume']].astype(float)

def fetch_historical_data(symbol, interval, start_time, end_time):
    all_data = []
    while start_time < end_time:
        df = fetch_candlestick_data(symbol, interval, start_time=start_time, end_time=end_time)
        if df.empty:
            break
        all_data.append(df)
        # Update start_time to the last CloseTime in the fetched data
        start_time = int(df['CloseTime'].iloc[-1]) + 1
        time.sleep(1)  # Sleep to respect API rate limits
    return pd.concat(all_data, ignore_index=True)

# Calculate start and end times
end_time = int(datetime.now().timestamp() * 1000)  # Current time in milliseconds
start_time = int((datetime.now() - timedelta(days=5*365)).timestamp() * 1000)  # 5 years ago in milliseconds

# Example usage
symbol = "BTCUSDT"
interval = "1h"
data = fetch_historical_data(symbol, interval, start_time, end_time)

# Print the total number of data points returned
print(f"Total data points returned: {len(data)}")



[1576778400000, '7131.75000000', '7169.00000000', '7119.16000000', '7157.52000000', '1339.95962400', 1576781999999, '9583170.93923106', 14298, '763.86989400', '5463439.71573474', '0']


KeyError: 'CloseTime'