## Fetch Spot Data 

In [1]:
import csv
import os
import sys
from pathlib import Path

import ccxt
from dotenv import load_dotenv

# Add the parent directory to the system path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from utils.general import (check_missing_timestamps,
                           get_top_symbol_by_volume)

# Load environment variables
load_dotenv()


# Add the parent directory to the system path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

# Print the ccxt library version
print(f"ccxt version: {ccxt.__version__}")

ccxt version: 4.4.40


In [13]:
def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    num_retries = 0
    try:
        num_retries += 1
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
        # print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
        return ohlcv
    except Exception:
        if num_retries > max_retries:
            raise  # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')


def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
    earliest_timestamp = exchange.milliseconds()
    timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
    timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
    timedelta = limit * timeframe_duration_in_ms
    all_ohlcv = []
    while True:
        fetch_since = earliest_timestamp - timedelta
        ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
        # if we have reached the beginning of history
        if len(ohlcv)>0:
            if ohlcv[0][0] >= earliest_timestamp:
                break
        else:
            break
        earliest_timestamp = ohlcv[0][0]
        all_ohlcv = ohlcv + all_ohlcv
        print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
        # if we have reached the checkpoint
        if fetch_since < since:
            break
    return all_ohlcv


def write_to_csv(filename, path_save, data):
    # Create the full path
    full_path = Path(path_save) / filename
    full_path.parent.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
    # Write to the file
    with full_path.open('w+', newline='') as output_file:
        csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerows(data)



def scrape_candles_to_csv(filename, exchange, max_retries, symbol, timeframe, since, limit, path_save):
    
    # Convert `since` to milliseconds if needed
    if isinstance(since, str):
        since = exchange.parse8601(since)
    # Preload all markets
    exchange.load_markets()
    # Fetch all candles
    ohlcv = scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit)
    # Save them to CSV
    write_to_csv(filename, path_save, ohlcv)
    print(f"Saved {len(ohlcv)} candles from {exchange.iso8601(ohlcv[0][0])} to {exchange.iso8601(ohlcv[-1][0])} to {filename}")

## Fetch data from FUTURE

In [14]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "future"
EXCHANGE_ID = "bitget"
exchange = getattr(ccxt, EXCHANGE_ID)({
'apiKey': API_KEY,
'secret': SECRET_KEY,
'password': PASSWORD,
'options': {
    'defaultType': MARKET_TYPE},
    'enableRateLimit': True
})

In [None]:
df_symbols = get_top_symbol_by_volume(exchange=exchange, pair_filter="/USDT:USDT", top_n=100)
df_symbols = df_symbols.reset_index(drop=True)
df_symbols.head(10)

Unnamed: 0,symbol,volume_24h,price
0,BTC/USDT:USDT,6572750000.0,97971.5
1,ETH/USDT:USDT,3224237000.0,3633.01
2,XRP/USDT:USDT,1395024000.0,2.3847
3,HIVE/USDT:USDT,364130500.0,0.5989
4,MOCA/USDT:USDT,233491700.0,0.31479
5,SOL/USDT:USDT,206075600.0,212.724
6,DOGE/USDT:USDT,165445000.0,0.38148
7,ADA/USDT:USDT,82531270.0,1.092
8,SUI/USDT:USDT,76437540.0,5.197
9,STEEM/USDT:USDT,72323790.0,0.30383


In [12]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "future"
EXCHANGE_ID = "bitget"
PATH_SAVE = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/{EXCHANGE_ID}/{MARKET_TYPE}/test"
TIMEFRAME = "1m"
FROM_DATE_STR = "2024-01-01 00:00:00"
LIMIT = 200
SYMBOL = "VIRTUAL/USDT:USDT"
FILENAME = SYMBOL.replace("/", "_") + f"_{TIMEFRAME}.csv"

scrape_candles_to_csv(filename=FILENAME, exchange=exchange,
                      max_retries=3, symbol=SYMBOL,
                      timeframe=TIMEFRAME, since=FROM_DATE_STR,
                      limit=LIMIT, path_save=PATH_SAVE)

In [7]:
import pandas as pd


df = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/test/BTC_USDT:USDT_1m.csv", header=None)
df = pd.DataFrame(df.values, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
df['date'] = pd.to_datetime(df['date'], unit='ms')
missing = check_missing_timestamps(df, freq='1min')
df.set_index('date', inplace=True)

In [8]:
df

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-24 23:52:00,98600.0,98613.8,98598.3,98608.8,55.554
2024-12-24 23:53:00,98608.8,98745.7,98608.8,98733.5,107.575
2024-12-24 23:54:00,98733.5,98792.9,98694.8,98792.6,86.779
2024-12-24 23:55:00,98792.6,98822.1,98754.0,98755.8,70.841
2024-12-24 23:56:00,98755.8,98755.8,98719.3,98719.6,79.707
...,...,...,...,...,...
2025-01-05 19:36:00,97980.6,97980.6,97950.6,97972.6,355.171
2025-01-05 19:37:00,97972.6,97974.5,97956.3,97971.7,74.326
2025-01-05 19:38:00,97971.7,97972.6,97958.2,97969.6,22.443
2025-01-05 19:39:00,97969.6,97993.7,97969.6,97993.7,26.431


## Check missing data

In [6]:
import natsort
import glob
import pandas as pd

PATH_SAVE = "/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future"
files_path = natsort.natsorted(glob.glob(os.path.join(PATH_SAVE, "*.csv"), recursive=False))
for file in files_path:

    df = pd.read_csv(file, header=None)
    df = pd.DataFrame(df.values, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
    df['date'] = pd.to_datetime(df['date'], unit='ms')
    missing = check_missing_timestamps(df, freq='1min')
    if not missing.empty:
        print("Missing timestamps:")
        print(file)

Missing timestamps:
/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/MOCA_USDT:USDT_1h.csv
Missing timestamps:
/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/STEEM_USDT:USDT_1h.csv
Missing timestamps:
/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/VIRTUAL_USDT:USDT_1h.csv


In [5]:
df = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/mexc/future/MOCA_USDT:USDT_1m.csv", header=None)
df = pd.DataFrame(df.values, columns=['date', 'open', 'high', 'low', 'close', 'volume'])
df['date'] = pd.to_datetime(df['date'], unit='ms')
missing = check_missing_timestamps(df, freq='1min')
df.set_index('date', inplace=True)

In [6]:
df_future = df.resample('1h').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})

In [None]:
df_future

In [None]:
df_symbols = get_top_symbol_by_volume(exchange=exchange, pair_filter="/USDT:USDT", top_n=100)
df_symbols = df_symbols.reset_index(drop=True)
df_symbols.head(100)

In [39]:
df_symbols.to_csv("/home/ubuntu/project/finance/cex-market-analysis/symbols/top_100_bitget.csv")