In [1]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time

# Download Historical Bitcoin Price Data

## Download historical Bitcoin price data from the Coinbase API

We will retrieve Bitcoin price information through the Coinbase API, capturing granular historical data at a minute-by-minute resolution.

### Key features:
1. Downloads minute-by-minute Bitcoin price data
2. Automatically handles API rate limits
3. Includes error handling and retries
4. Creates backup files automatically
5. Adds additional time columns for easier analysis

### Important notes:
1. It's recommended to download data in smaller time periods (e.g., 1-2 days at a time) due to the large volume of minute data
2. The code automatically pauses between requests to respect API rate limits
3. Backup files are created for each download
4. The code will retry up to 3 times on errors

The resulting CSV file will contain the following columns:
- timestamp: The exact time of the data point
- open: Opening price
- high: Highest price during the minute
- low: Lowest price during the minute
- close: Closing price
- volume: Trading volume
- date: Date part of the timestamp
- time: Time part of the timestamp

In [5]:
def get_historical_data_minutes(start_date, end_date, granularity = 60):
    """
    Downloads historical data from Coinbase API at minute intervals
    
    Parameters:
    start_date (str): Start date in 'YYYY-MM-DD' format
    end_date (str): End date in 'YYYY-MM-DD' format
    granularity (int): Interval in seconds (60 = 1 minute)
    
    Returns:
    pandas.DataFrame: DataFrame with historical data
    """
    
    # Convert dates to datetime objects
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    
    # Base URL for API requests
    base_url = "https://api.exchange.coinbase.com"
    
    # Empty list to store data
    all_data = []
    
    # For minute data, we'll get data in 300-minute intervals
    # to comply with API limitations
    current_start = start
    
    while current_start < end:
        # For minute data, we take smaller time windows
        current_end = min(current_start + timedelta(minutes = 300), end)
        
        # Create URL for the request
        endpoint = f"/products/BTC-USD/candles"
        params = {
            'start': current_start.isoformat(),
            'end': current_end.isoformat(),
            'granularity': granularity
        }
        
        # Send request with retries on error
        max_retries = 3
        retry_count = 0
        while retry_count < max_retries:
            try:
                response = requests.get(f"{base_url}{endpoint}", params=params)
                if response.status_code == 200:
                    data = response.json()
                    all_data.extend(data)
                    print(f"Downloaded data from {current_start} to {current_end}")
                    break
                elif response.status_code == 429:  # Too Many Requests
                    print("API rate limit reached. Waiting 30 seconds...")
                    time.sleep(30)
                else:
                    print(f"Request error: {response.status_code}")
                    time.sleep(5)
            except Exception as e:
                print(f"Error: {e}")
                time.sleep(5)
            retry_count += 1
        
        # Wait between requests to avoid hitting rate limits
        time.sleep(1)
        
        # Move to next period
        current_start = current_end

    if not all_data:
        raise Exception("No data received from API!")

    # Create DataFrame
    df = pd.DataFrame(all_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    
    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    
    # Sort by time
    df = df.sort_values('timestamp')
    
    # Add additional time columns for easier analysis
    df['date'] = df['timestamp'].dt.date
    df['time'] = df['timestamp'].dt.time
    
    return df

def save_data_with_backup(df, filename='bitcoin_historical_data.csv'):
    """
    Saves data with a backup file
    """
    # Save main file
    df.to_csv(filename, index=False)
    
    # Create backup file with timestamp
    backup_filename = f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{filename}"
    df.to_csv(backup_filename, index=False)
    
    print(f"Data saved to {filename}")
    print(f"Backup created at {backup_filename}")

In [8]:
# Example usage:
if __name__ == "__main__":
    # Start date in 'YYYY-MM-DD' format
    start_date = '2024-11-01'
    # End date in 'YYYY-MM-DD' format
    end_date = '2024-11-02'  # For minute data, it's recommended to download smaller periods
    
    try:
        # Download the data
        btc_data = get_historical_data_minutes(start_date, end_date)
        
        # Display information about the data
        print("\nDownloaded data information:")
        print(f"Number of rows: {len(btc_data)}")
        print(f"Time period: from {btc_data['timestamp'].min()} to {btc_data['timestamp'].max()}")

        filename = 'test_bitcoin_historical_data_1_year.csv'
        # Save the data
        save_data_with_backup(btc_data, filename)
        
    except Exception as e:
        print(f"An error occurred: {e}")

Downloaded data from 2024-11-01 00:00:00 to 2024-11-01 05:00:00
Downloaded data from 2024-11-01 05:00:00 to 2024-11-01 10:00:00
Downloaded data from 2024-11-01 10:00:00 to 2024-11-01 15:00:00
Downloaded data from 2024-11-01 15:00:00 to 2024-11-01 20:00:00
Downloaded data from 2024-11-01 20:00:00 to 2024-11-02 00:00:00

Downloaded data information:
Number of rows: 1445
Time period: from 2024-11-01 00:00:00 to 2024-11-02 00:00:00
Data saved to test_bitcoin_historical_data_1_year.csv
Backup created at backup_20241205_001254_test_bitcoin_historical_data_1_year.csv
