## Fetch Spot Data 

In [1]:
import os
import ccxt
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime
import time
import natsort
import glob

import sys
from pathlib import Path

# Add the parent directory to the system path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from utils.general import check_missing_timestamps

# Load environment variables
load_dotenv()

True

In [2]:
# Print the ccxt library version
print(f"ccxt version: {ccxt.__version__}")

# Retrieve API credentials from environment variables
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')

# Initialize the Bitget exchange with API credentials
exchange = ccxt.bitget({
    'apiKey': API_KEY,
    'secret': SECRET_KEY,
    'password': PASSWORD,
})

# Enable rate limiting to comply with API usage policies
exchange.enableRateLimit = True

ccxt version: 4.4.40


In [3]:
symbol = "SOL/USDT"
timeframe = "1h"
limit = 100
max_retries = 3  # Maximum number of retries for empty responses

# Convert start and end dates to timestamps
start_date = "2024-10-22T00:00:00"
end_date = "2025-01-03T09:00:00"
start_timestamp = int(pd.Timestamp(start_date).timestamp() * 1000)
end_timestamp = int(pd.Timestamp(end_date).timestamp() * 1000)
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=start_timestamp, limit=limit)
df = pd.DataFrame(ohlcv, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
df['date'] = pd.to_datetime(df['date'], unit='ms')
df

Unnamed: 0,date,open,high,low,close,volume


In [4]:
# Define parameters
symbol = "SOL/USDT"
timeframe = "1m"
limit = 100
max_retries = 3  # Maximum number of retries for empty responses

# Convert start and end dates to timestamps
start_date = "2024-12-01T00:00:00"
end_date = "2025-01-03T09:00:00"
start_timestamp = int(pd.Timestamp(start_date).timestamp() * 1000)
end_timestamp = int(pd.Timestamp(end_date).timestamp() * 1000)

# Fetch data in chunks of 'limit' timesteps
data = []
current_timestamp = start_timestamp

while current_timestamp < end_timestamp:
    retries = 0
    while retries < max_retries:
        try:
            ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since=current_timestamp, limit=limit)
            if not ohlcv:
                retries += 1
                print(f"No data returned. Retrying {retries}/{max_retries}...")
                time.sleep(1)  # Wait 1 second before retrying
                continue
            data.extend(ohlcv)
            current_timestamp = ohlcv[-1][0] + 1  # Update timestamp to the last candle + 1ms
            print(f"Fetched data up to {pd.to_datetime(current_timestamp, unit='ms')}")
            break
        except Exception as e:
            print(f"An error occurred: {e}")
            retries += 1
    if retries == max_retries:
        print(f"Max retries reached for timestamp {pd.to_datetime(current_timestamp, unit='ms')}. Skipping...")
        current_timestamp += 1 * 60  * 1000 
      
# Create DataFrame if data exists
if data:
    df = pd.DataFrame(data, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
    df['date'] = pd.to_datetime(df['date'], unit='ms')
    print(df)
else:
    print(f"No data fetched for {symbol} between {start_date} and {end_date}.")

Fetched data up to 2024-12-01 01:39:00.001000
Fetched data up to 2024-12-01 03:18:00.001000
Fetched data up to 2024-12-01 04:57:00.001000
Fetched data up to 2024-12-01 06:36:00.001000
Fetched data up to 2024-12-01 08:15:00.001000
Fetched data up to 2024-12-01 09:54:00.001000
Fetched data up to 2024-12-01 11:33:00.001000
Fetched data up to 2024-12-01 13:12:00.001000
Fetched data up to 2024-12-01 14:51:00.001000
Fetched data up to 2024-12-01 16:30:00.001000
Fetched data up to 2024-12-01 18:09:00.001000
Fetched data up to 2024-12-01 19:48:00.001000
Fetched data up to 2024-12-01 21:27:00.001000
Fetched data up to 2024-12-01 23:06:00.001000
Fetched data up to 2024-12-02 00:45:00.001000
Fetched data up to 2024-12-02 02:24:00.001000
Fetched data up to 2024-12-02 04:03:00.001000
Fetched data up to 2024-12-02 05:42:00.001000
Fetched data up to 2024-12-02 07:21:00.001000
Fetched data up to 2024-12-02 09:00:00.001000
Fetched data up to 2024-12-02 10:39:00.001000
Fetched data up to 2024-12-02 12:1

In [5]:
missing = check_missing_timestamps(df, freq='1min')

In [12]:
dir_path = "/home/ubuntu/project/finance/cex-market-analysis/src/data/spot/binance"
files_path = natsort.natsorted(glob.glob(os.path.join(dir_path, "*.csv"), recursive=False))
for file in files_path:
    missing = check_missing_timestamps(df, freq='1min')
    if not missing.empty:
        print("Missing timestamps:")
        print(file) 

In [13]:
df = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/spot/binance/AAVE_USDT_1m.csv")
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
new_df = df.resample('1d').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})
new_df

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-10-01,156.76,164.32,143.30,144.81,307625.849
2024-10-02,144.80,150.23,136.82,139.14,248375.003
2024-10-03,139.16,146.80,134.70,143.73,343883.213
2024-10-04,143.73,152.95,141.52,151.25,297529.597
2024-10-05,151.29,151.89,146.07,148.46,69951.682
...,...,...,...,...,...
2024-12-28,324.01,362.29,318.74,354.40,225926.455
2024-12-29,354.53,355.83,326.45,332.74,164666.205
2024-12-30,332.77,344.86,318.99,322.06,288106.733
2024-12-31,322.08,328.50,306.00,308.82,251212.136


In [14]:
# from utils.general import OHLCVScraper
# exchange_id = "bitget"
# path_save = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/"
# scraper = OHLCVScraper(path_save=path_save, exchange_id=exchange_id)
# symbol = "SOL/USDT"
# timeframe = "1m"
# scraper.scrape_candles_to_csv(
#         symbol=symbol,
#         timeframe=timeframe,
#         start_date_str="2024-12-01 00:00:00",
#         end_date_str="2025-01-01 00:00:00",
#         limit=100
#     )