## Fetch Spot Data 

In [1]:
import os
import ccxt
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime
import time
import natsort
import glob

import sys
from pathlib import Path

# Add the parent directory to the system path
parent_dir = Path().resolve().parent
sys.path.append(str(parent_dir))

from utils.general import check_missing_timestamps

# Load environment variables
load_dotenv()

# Print the ccxt library version
print(f"ccxt version: {ccxt.__version__}")

True

## Fetch data from SPOT

In [13]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "spot"
EXCHANGE_ID = "bitget"
PATH_SAVE = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/{EXCHANGE_ID}/{MARKET_TYPE}"
TIMEFRAME = '1m'
SYMBOL = "XRP/USDT"
START_DATE_STR = "2024-12-28 00:00:00"
END_DATE_STR = "2025-01-01 00:00:00"

exchange = getattr(ccxt, EXCHANGE_ID)({
'apiKey': API_KEY,
'secret': SECRET_KEY,
'password': PASSWORD,
'options': {
    'defaultType': MARKET_TYPE},
    'enableRateLimit': True
})

In [14]:
from utils.general import OHLCVScraper

scraper = OHLCVScraper(path_save=PATH_SAVE, exchange=exchange, exchange_id=EXCHANGE_ID)
scraper.scrape_candles_to_csv(
                symbol=SYMBOL,
                timeframe=TIMEFRAME,
                start_date_str=START_DATE_STR,
                end_date_str=END_DATE_STR,
                limit=100)

Fetched data up to 2024-12-28 01:40:00.001000
Fetched data up to 2024-12-28 03:20:00.001000
Fetched data up to 2024-12-28 05:00:00.001000
Fetched data up to 2024-12-28 06:40:00.001000
Fetched data up to 2024-12-28 08:20:00.001000
Fetched data up to 2024-12-28 10:00:00.001000
Fetched data up to 2024-12-28 11:40:00.001000
Fetched data up to 2024-12-28 13:20:00.001000
Fetched data up to 2024-12-28 15:00:00.001000
Fetched data up to 2024-12-28 16:40:00.001000
Fetched data up to 2024-12-28 18:20:00.001000
Fetched data up to 2024-12-28 20:00:00.001000
Fetched data up to 2024-12-28 21:40:00.001000
Fetched data up to 2024-12-28 23:20:00.001000
Fetched data up to 2024-12-29 01:00:00.001000
Fetched data up to 2024-12-29 02:40:00.001000
Fetched data up to 2024-12-29 04:20:00.001000
Fetched data up to 2024-12-29 06:00:00.001000
Fetched data up to 2024-12-29 07:40:00.001000
Fetched data up to 2024-12-29 09:20:00.001000
Fetched data up to 2024-12-29 11:00:00.001000
Fetched data up to 2024-12-29 12:4

In [20]:
df_spot = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/spot/bitget/XRP_USDT_1m.csv")
df_spot['date'] = pd.to_datetime(df_spot['date'])
df_spot.set_index('date', inplace=True)
df_spot.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-28 00:01:00,2.1475,2.1484,2.1469,2.1469,30266.5664
2024-12-28 00:02:00,2.1469,2.1488,2.1458,2.1459,38306.2556
2024-12-28 00:03:00,2.1459,2.1473,2.1452,2.1453,29085.5795
2024-12-28 00:04:00,2.1453,2.1473,2.1437,2.1469,19614.0132
2024-12-28 00:05:00,2.1469,2.1516,2.1456,2.1516,54070.696


In [21]:
missing = check_missing_timestamps(df_spot, freq='1min')

In [22]:
df_spot = df_spot.resample('4h').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})
df_spot.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-28 00:00:00,2.1475,2.1572,2.1346,2.1551,4896090.0
2024-12-28 04:00:00,2.1551,2.1714,2.146,2.1581,4575096.0
2024-12-28 08:00:00,2.1581,2.1758,2.1568,2.1705,5040357.0
2024-12-28 12:00:00,2.1705,2.1943,2.1597,2.1732,5897753.0
2024-12-28 16:00:00,2.1732,2.2036,2.1721,2.1899,5253626.0


## Fetch data from FUTURE

In [23]:
API_KEY = os.getenv('BITGET_API_KEY')
SECRET_KEY = os.getenv('BITGET_SECRET_KEY')
PASSWORD = os.getenv('BITGET_PASSWORD')
MARKET_TYPE = "future"
EXCHANGE_ID = "bitget"
PATH_SAVE = f"/home/ubuntu/project/finance/cex-market-analysis/src/data/{EXCHANGE_ID}/{MARKET_TYPE}"
TIMEFRAME = '1m'
SYMBOL = "XRP/USDT:USDT"
START_DATE_STR = "2024-12-28 00:00:00"
END_DATE_STR = "2025-01-01 00:00:00"

exchange = getattr(ccxt, EXCHANGE_ID)({
'apiKey': API_KEY,
'secret': SECRET_KEY,
'password': PASSWORD,
'options': {
    'defaultType': MARKET_TYPE},
    'enableRateLimit': True
})

In [25]:
from utils.general import OHLCVScraper

scraper = OHLCVScraper(path_save=PATH_SAVE, exchange=exchange, exchange_id=EXCHANGE_ID)
scraper.scrape_candles_to_csv(
                symbol=SYMBOL,
                timeframe=TIMEFRAME,
                start_date_str=START_DATE_STR,
                end_date_str=END_DATE_STR,
                limit=100)

Fetched data up to 2024-12-28 01:40:00.001000
Fetched data up to 2024-12-28 03:20:00.001000
Fetched data up to 2024-12-28 05:00:00.001000
Fetched data up to 2024-12-28 06:40:00.001000
Fetched data up to 2024-12-28 08:20:00.001000
Fetched data up to 2024-12-28 10:00:00.001000
Fetched data up to 2024-12-28 11:40:00.001000
Fetched data up to 2024-12-28 13:20:00.001000
Fetched data up to 2024-12-28 15:00:00.001000
Fetched data up to 2024-12-28 16:40:00.001000
Fetched data up to 2024-12-28 18:20:00.001000
Fetched data up to 2024-12-28 20:00:00.001000
Fetched data up to 2024-12-28 21:40:00.001000
Fetched data up to 2024-12-28 23:20:00.001000
Fetched data up to 2024-12-29 01:00:00.001000
Fetched data up to 2024-12-29 02:40:00.001000
Fetched data up to 2024-12-29 04:20:00.001000
Fetched data up to 2024-12-29 06:00:00.001000
Fetched data up to 2024-12-29 07:40:00.001000
Fetched data up to 2024-12-29 09:20:00.001000
Fetched data up to 2024-12-29 11:00:00.001000
Fetched data up to 2024-12-29 12:4

In [26]:
df_future = pd.read_csv("/home/ubuntu/project/finance/cex-market-analysis/src/data/bitget/future/bitget/XRP_USDT:USDT_1m.csv")
df_future['date'] = pd.to_datetime(df_future['date'])
df_future.set_index('date', inplace=True)
missing = check_missing_timestamps(df_future, freq='1min')


In [27]:
df_future = df_future.resample('4h').agg({
    'open': 'first',     # First price in the 1-hour window (Open)
    'high': 'max',       # Maximum price in the 1-hour window (High)
    'low': 'min',        # Minimum price in the 1-hour window (Low)
    'close': 'last',     # Last price in the 1-hour window (Close)
    'volume': 'sum'      # Total volume in the 1-hour window
})
df_future.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-28 00:00:00,2.1465,2.1569,2.1332,2.1543,51169469.0
2024-12-28 04:00:00,2.1543,2.1711,2.1451,2.1577,58383774.0
2024-12-28 08:00:00,2.1577,2.1758,2.1567,2.1688,38290569.0
2024-12-28 12:00:00,2.1688,2.1941,2.1586,2.1725,70107270.0
2024-12-28 16:00:00,2.1725,2.2035,2.1707,2.1885,60825709.0


In [28]:
df_future - df_spot

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-12-28 00:00:00,-0.001,-0.0003,-0.0014,-0.0008,46273380.0
2024-12-28 04:00:00,-0.0008,-0.0003,-0.0009,-0.0004,53808680.0
2024-12-28 08:00:00,-0.0004,0.0,-0.0001,-0.0017,33250210.0
2024-12-28 12:00:00,-0.0017,-0.0002,-0.0011,-0.0007,64209520.0
2024-12-28 16:00:00,-0.0007,-0.0001,-0.0014,-0.0014,55572080.0
2024-12-28 20:00:00,-0.0014,0.0001,-0.0016,-0.0006,46862520.0
2024-12-29 00:00:00,-0.0006,-0.0005,-0.0008,-0.0008,48122040.0
2024-12-29 04:00:00,-0.0008,-0.0007,-0.0003,-0.001,31857370.0
2024-12-29 08:00:00,-0.001,-0.0007,-0.0016,-0.0018,38801760.0
2024-12-29 12:00:00,-0.0018,0.0,-0.0007,-0.0015,86399400.0


In [11]:
# dir_path = "/home/ubuntu/project/finance/cex-market-analysis/src/data/test/1m"
# files_path = natsort.natsorted(glob.glob(os.path.join(dir_path, "*.csv"), recursive=False))
# for file in files_path:
#     missing = check_missing_timestamps(df, freq='1min')
#     if not missing.empty:
#         print("Missing timestamps:")
#         print(file)