In [1]:
import sys
sys.path.append('..') # for import src

import os
import cloudpickle
import lzma
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import ccxt

import src
cloudpickle.register_pickle_by_value(src) # for model portability

In [2]:
import time

class Fetcher:
    def __init__(self, symbol=None):
        self.symbol = symbol
        self.keys = {
            'symbol': symbol
        }
        self.data_id = 'bf_ohlcv_5m'
        
    def fetch(self, last_timestamp=None):
        client = ccxt.bitflyer()
        
        start_time = last_timestamp + 5 * 60
        end_id = None
        
        dfs = []
        
        while True:
            time.sleep(2) # 500 calls / 5min
            result = client.publicGetGetexecutions({
                'product_code': self.symbol,
                'before': end_id, # not included
                'count': 1000,
            })
            df = pd.DataFrame(result)
            df['timestamp'] = pd.to_datetime(df['exec_date'], utc=True).astype(int) // 10**9
            dfs.append(df)
            
            # display(df)
            
            end_id = df['id'].min()
            if df['timestamp'].min() < start_time:
                break
        
        df = pd.concat(dfs)
        df = df.sort_values('id')
        df = df.drop_duplicates('id')
        df = df.loc[df['timestamp'] >= start_time]
        
        for col in ['price', 'size']:
            df[col] = df[col].astype('float')
        
        df['timestamp_5m'] = (df['timestamp'] // 300) * 300
        df['timestamp_1m'] = (df['timestamp'] // 60) * 60
        
        df_1m = pd.concat([
            df.groupby('timestamp_1m')['price'].nth(-1).rename('cl'),
        ], axis=1)
        df_1m = df_1m.reset_index()
        df_1m['timestamp_5m'] = (df_1m['timestamp_1m'] // 300) * 300

        df['amount'] = df['price'] * df['size']
        df['buy_volume'] = np.where(df['side'] == 'BUY', df['size'], 0)
        df['buy_amount'] = np.where(df['side'] == 'BUY', df['amount'], 0)
        
        df = pd.concat([
            df.groupby('timestamp_5m')['price'].nth(0).rename('op'),
            df.groupby('timestamp_5m')['price'].max().rename('hi'),
            df.groupby('timestamp_5m')['price'].min().rename('lo'),
            df.groupby('timestamp_5m')['price'].nth(-1).rename('cl'),
            df.groupby('timestamp_5m')['size'].sum().rename('volume'),
            df.groupby('timestamp_5m')['amount'].sum(),
            df.groupby('timestamp_5m')['price'].count().rename('trades'),
            df.groupby('timestamp_5m')['buy_volume'].sum(),
            df.groupby('timestamp_5m')['buy_amount'].sum(),
            df_1m.groupby('timestamp_5m')['cl'].mean().rename('twap'),
        ], axis=1)
        
        df.index.rename('timestamp', inplace=True)
        
        df = df.iloc[:-1] # remove partial
        
        return df


In [3]:
fetcher = Fetcher(symbol='FX_BTC_JPY')
# df = fetcher.fetch(last_timestamp=None)
# display(df)
df = fetcher.fetch(last_timestamp=(1669990000 // 300) * 300)
display(df)

Unnamed: 0_level_0,op,hi,lo,cl,volume,amount,trades,buy_volume,buy_amount,twap
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1669990200,2304937.0,2307700.0,2304816.0,2307319.0,25.529977,58895560.0,882,14.449428,33333610.0,2307357.0
1669990500,2307226.0,2309581.0,2306587.0,2308972.0,28.023374,64681870.0,1125,16.684457,38509950.0,2308154.2
1669990800,2309084.0,2309084.0,2304404.0,2305594.0,49.492438,114161000.0,1382,21.189935,48884080.0,2306662.4
1669991100,2305806.0,2307386.0,2304700.0,2304861.0,17.30022,39892010.0,591,6.391633,14740200.0,2306036.0
1669991400,2304861.0,2312004.0,2304725.0,2310083.0,49.093816,113333800.0,1579,28.180791,65051460.0,2309476.6
1669991700,2310251.0,2315784.0,2308614.0,2314023.0,55.244383,127741000.0,1746,33.637772,77778230.0,2310926.6
1669992000,2314023.0,2314734.0,2310589.0,2310683.0,27.018605,62486530.0,971,12.199157,28215760.0,2311866.0
1669992300,2310683.0,2311190.0,2308470.0,2309144.0,26.283867,60712550.0,995,11.435665,26415640.0,2309667.4
1669992600,2309414.0,2310523.0,2308733.0,2310523.0,18.860184,43558510.0,672,11.428861,26396340.0,2309854.2
1669992900,2310277.0,2310737.0,2308910.0,2309148.0,11.455828,26459770.0,480,5.679155,13117730.0,2309394.4


In [4]:
# initial manual upload of old data is required

symbols = ['FX_BTC_JPY']
fetchers = []

for symbol in symbols:
    fetchers.append(Fetcher(symbol=symbol))
    
data = cloudpickle.dumps(fetchers)
data = lzma.compress(data)
with open('/home/jovyan/data/20221202_bf_ohlcv_5m.xz', 'wb') as f:
    f.write(data)