## Fetch Spot Data 

In [16]:
import os
import ccxt
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime
import time
import natsort
import glob


import sys
from pathlib import Path

# Add the parent directory to the system path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from utils.general import check_missing_timestamps
from utils.general import OHLCVScraper, get_top_symbol_by_volume

# Load environment variables
load_dotenv()

# Print the ccxt library version
print(f"ccxt version: {ccxt.__version__}")

ccxt version: 4.4.40


In [17]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "spot"
EXCHANGE_ID = "bitget"
PATH_SAVE = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/{EXCHANGE_ID}/{MARKET_TYPE}"
TIMEFRAME = '1m'
SYMBOL = "XRP/USDT"
START_DATE_STR = "2025-01-03 00:00:00"
END_DATE_STR = "2025-01-04 00:00:00"

exchange = getattr(ccxt, EXCHANGE_ID)({
'apiKey': API_KEY,
'secret': SECRET_KEY,
'password': PASSWORD,
'options': {
    'defaultType': MARKET_TYPE},
    'enableRateLimit': True
})

In [18]:
START_DATE_STR = "2025-01-04 15:00:00"
start_timestamp = int(pd.Timestamp(START_DATE_STR).timestamp() * 1000)
ohlcv = exchange.fetch_ohlcv(SYMBOL, TIMEFRAME, since=start_timestamp, limit=100)
df = pd.DataFrame(ohlcv, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
df['date'] = pd.to_datetime(df['date'], unit='ms')
df.tail()

Unnamed: 0,date,open,high,low,close,volume
95,2025-01-04 16:36:00,2.4183,2.421,2.4182,2.421,38490.0271
96,2025-01-04 16:37:00,2.421,2.4235,2.4201,2.4235,40340.1931
97,2025-01-04 16:38:00,2.4235,2.4237,2.4221,2.4222,39439.9017
98,2025-01-04 16:39:00,2.4222,2.4227,2.4202,2.4209,36304.9992
99,2025-01-04 16:40:00,2.4209,2.4211,2.4194,2.4197,34908.7605


In [19]:
df_symbols = get_top_symbol_by_volume(exchange=exchange, pair_filter="/USDT", top_n=100)
df_symbols.head()

Unnamed: 0,symbol,volume_24h,price
14,ETH/USDT,375175300.0,3609.06
13,BTC/USDT,257393800.0,97591.16
51,BGB/USDT,157718400.0,6.021
6,DOGE/USDT,144416500.0,0.3824
697,X/USDT,92100260.0,0.000163


In [None]:
from utils.general import OHLCVScraper

scraper = OHLCVScraper(path_save=PATH_SAVE, exchange=exchange, exchange_id=EXCHANGE_ID)
scraper.scrape_candles_to_csv(
                symbol=SYMBOL,
                timeframe=TIMEFRAME,
                start_date_str=START_DATE_STR,
                end_date_str=END_DATE_STR,
                limit=100)

In [None]:
df_spot = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/spot/XRP_USDT_1m.csv")
df_spot['date'] = pd.to_datetime(df_spot['date'])
df_spot.set_index('date', inplace=True)
df_spot.head()

In [15]:
missing = check_missing_timestamps(df_spot, freq='1min')

In [None]:
df_spot = df_spot.resample('4h').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})
df_spot.head()

## Fetch data from FUTURE

In [9]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "future"
EXCHANGE_ID = "bitget"
PATH_SAVE = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/{EXCHANGE_ID}/{MARKET_TYPE}"
TIMEFRAME = '1d'
SYMBOL = "BTC/USDT:USDT"
START_DATE_STR = "2024-01-01 00:00:00"
END_DATE_STR = "2025-01-01 00:00:00"

exchange = getattr(ccxt, EXCHANGE_ID)({
'apiKey': API_KEY,
'secret': SECRET_KEY,
'password': PASSWORD,
'options': {
    'defaultType': MARKET_TYPE},
    'enableRateLimit': True
})

In [7]:
df_symbols = get_top_symbol_by_volume(exchange=exchange, pair_filter="/USDT:USDT", top_n=100)
df_symbols = df_symbols.reset_index(drop=True)
df_symbols.head(50)
for i in df_symbols['symbol']:
    print(i)

BTC/USDT:USDT
ETH/USDT:USDT
XRP/USDT:USDT
SOL/USDT:USDT
DOGE/USDT:USDT
MOCA/USDT:USDT
HIVE/USDT:USDT
SUI/USDT:USDT
ADA/USDT:USDT
PEPE/USDT:USDT
UNI/USDT:USDT
STEEM/USDT:USDT
CHILLGUY/USDT:USDT
HBAR/USDT:USDT
BGB/USDT:USDT
XLM/USDT:USDT
BIO/USDT:USDT
ENA/USDT:USDT
BRETT/USDT:USDT
AGLD/USDT:USDT
DegenReborn/USDT:USDT
AI16Z/USDT:USDT
LINK/USDT:USDT
ACT/USDT:USDT
PENGU/USDT:USDT
ONDO/USDT:USDT
VIRTUAL/USDT:USDT
FARTCOIN/USDT:USDT
STG/USDT:USDT
KMNO/USDT:USDT
UXLINK/USDT:USDT
PHA/USDT:USDT
SHIB/USDT:USDT
PRCL/USDT:USDT
AIXBT/USDT:USDT
PNUT/USDT:USDT
WLD/USDT:USDT
AAVE/USDT:USDT
MOVE/USDT:USDT
FET/USDT:USDT
ARB/USDT:USDT
GIGA/USDT:USDT
TAO/USDT:USDT
AVAX/USDT:USDT
HYPE/USDT:USDT
SAND/USDT:USDT
GLM/USDT:USDT
DOT/USDT:USDT
WIF/USDT:USDT
DEEP/USDT:USDT
DF/USDT:USDT
SEI/USDT:USDT
FUEL/USDT:USDT
1000000MOG/USDT:USDT
GALA/USDT:USDT
USUAL/USDT:USDT
ALGO/USDT:USDT
NOT/USDT:USDT
L3/USDT:USDT
ENS/USDT:USDT
IO/USDT:USDT
VET/USDT:USDT
MEW/USDT:USDT
TRX/USDT:USDT
ZEREBRO/USDT:USDT
TIA/USDT:USDT
NEAR/USDT

In [8]:
from utils.general import OHLCVScraper

scraper = OHLCVScraper(path_save=PATH_SAVE, exchange=exchange, exchange_id=EXCHANGE_ID)
scraper.scrape_candles_to_csv(
                symbol=SYMBOL,
                timeframe=TIMEFRAME,
                start_date_str=START_DATE_STR,
                end_date_str=END_DATE_STR,
                limit=100)

Fetched data up to 2024-03-30 00:00:00.001000
Fetched data up to 2024-06-28 00:00:00.001000
Fetched data up to 2024-09-26 00:00:00.001000
Fetched data up to 2024-12-25 00:00:00.001000
Fetched data up to 2025-01-05 00:00:00.001000
          date     open     high      low    close      volume
0   2024-01-01  42301.5  44251.2  42194.8  44219.0  143211.522
1   2024-01-02  44219.0  45924.4  44189.2  44967.0  204757.811
2   2024-01-03  44967.0  45545.5  40207.9  42839.3  245129.089
3   2024-01-04  42839.3  44844.0  42615.0  44135.8  181999.077
4   2024-01-05  44135.8  44395.9  42256.0  44143.2  183130.092
..         ...      ...      ...      ...      ...         ...
366 2025-01-01  93570.6  95164.0  92852.8  94576.7   96539.787
367 2025-01-02  94576.7  97797.5  94378.0  96957.1  147751.813
368 2025-01-03  96957.1  98944.0  96068.0  98146.0  107540.168
369 2025-01-04  98146.0  98759.4  97502.6  98173.2   57405.581
370 2025-01-05  98173.2  98492.0  97555.0  97672.1   21064.849

[371 rows x 6

In [4]:
df_future = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/DOGE_USDT:USDT_1m.csv")
df_future['date'] = pd.to_datetime(df_future['date'])
df_future.set_index('date', inplace=True)
missing = check_missing_timestamps(df_future, freq='1min')

In [None]:
df_future = df_future.resample('4h').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})
df_future.head()

In [None]:
df_future - df_spot

In [29]:
PATH_SAVE = "/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future"
files_path = natsort.natsorted(glob.glob(os.path.join(PATH_SAVE, "*.csv"), recursive=False))
for file in files_path:
    df = pd.read_csv(file)
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)
    missing = check_missing_timestamps(df, freq='1min')
    if not missing.empty:
        print("Missing timestamps:")
        print(file)

In [13]:
import os
from pathlib import Path

import sys
import csv

# -----------------------------------------------------------------------------

root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(''))))
# sys.path.append(root + '/python')

import ccxt


def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds()
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        # if we have reached the beginning of history
        if ohlcv[0][0] >= earliest_timestamp:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        # if we have reached the checkpoint
        if fetch_since < since:
            break
    return all_ohlcv


def write_to_csv(filename, exchange, data):
    p = Path("./data/raw/", str(exchange))
    p.mkdir(parents=True, exist_ok=True)
    full_path = p / str(filename)
    with Path(full_path).open('w+', newline='') as output_file:
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)


def scrape_candles_to_csv(filename, exchange_id, max_retries, symbol, timeframe, since, limit):
    # instantiate the exchange by id
    exchange = getattr(ccxt, exchange_id)({
        'enableRateLimit': True,  # required by the Manual
    })
    # convert since from string to milliseconds integer if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # preload all markets from the exchange
    exchange.load_markets()
    # fetch all candles
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    # save them to csv file
    write_to_csv(filename, exchange, ohlcv)
    print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', filename)

In [15]:
scrape_candles_to_csv('btc_usdt_1m.csv', 'bitget', 3, 'BTC/USDT:USDT', '1d', '2024-12-0100:00:00Z', 1000)

89 BTC/USDT:USDT candles in total from 2022-04-12T00:00:00.000Z to 2022-07-09T00:00:00.000Z
Saved 89 candles from 2022-04-12T00:00:00.000Z to 2022-07-09T00:00:00.000Z to btc_usdt_1m.csv
