# premium ohlc

## unit functions

In [50]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.pkl','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc

def get_list_filename_premiumIndex(symbol):
    if symbol in get_list_symbols('premium_index'):
        res = requests.get(f'https://public.bybit.com/premium_index/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_premium_index(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for premium index {symbol} year {year} and month {month}')
    # loop on day
    while date.month == month:    
        url = 'https://public.bybit.com/premium_index/:symbol/:symbol:year-:month-:day_premium_index.pkl'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.pkl', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.pkl')
            r = requests.get(url)
            f.write(r.content)            
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['start_at', 'symbol', 'period', 'open', 'high', 'low', 'close'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path)],axis=0)
        os.remove(path)        
    df = df.reset_index(drop=True).rename(columns={"start_at":"time"}).drop(['symbol', 'period'], axis=1)
    # upload
    path = 'D://Trading/Data/Crypto/Bybit/premium_ohlc/:symbol/1min/:year/:year-:month.pkl'
    for key, item in params.items():
        path = path.replace(':'+key, item)
        
    folders_to_create = path[4:].split('/')[:-1]
    tmp_path = 'D://'
    for folder in folders_to_create:
        if folder not in os.listdir(tmp_path):
            os.mkdir(tmp_path+folder)
        tmp_path += folder + '/'
    
    df.to_csv(path,index=False)
        
def init_premium_ohlc():
    
    if 'infos.json' not in os.listdir('D://Trading/Data/Crypto/Bybit'):
        dic = {}
    else:    
        with open('D://Trading/Data/Crypto/Bybit/infos.json', 'r') as f:
            dic = json.load(f)
            f.close()
    
    if 'premium_ohlc' not in dic:
        dic['premium_ohlc'] = {
            "timeframe_str": {
                "1min": {
                    "file_length": "1m",
                    "columns": [
                      "time",
                      "open",
                      "high",
                      "low",
                      "close"
                    ],
                    "timeframe_sec": 60,
                    "hasVolume": false,
                    "unit_time": "s",
                    "is_gz": true,
                    "is_native": true,
                    "description": "ohlc of futures premium, i.e the difference between the perpetual contracts relative to the mark price. premium = 0.000003 => futures price is 0.03% higher than mark price",
                    "dic_year_month": {}
                }
            }
        }
            
    with open('D://Trading/Data/Crypto/Bybit/infos.json', 'w') as f:
        json.dump(dic,f)
        f.close()
        
init_premium_ohlc()

In [93]:
dowload_monthly_premium_index(symbol='BTCUSD', year=2020, month=5, print_mode=True)

2021-03-22 17:48:33.797173 downloading data for premium index BTCUSD year 2020 and month 5


In [37]:
df = pd.read_csv('D://Trading/Data/Crypto/Bybit/premium_ohlc/BTCUSD/1min/2020/2020-05.pkl')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,open,high,low,close,date
0,1588291200,0.000025,0.000100,0.000025,0.000100,2020-05-01 00:00:00
1,1588291260,0.000100,0.000100,-0.000169,-0.000169,2020-05-01 00:01:00
2,1588291320,-0.000169,-0.000074,-0.000169,-0.000074,2020-05-01 00:02:00
3,1588291380,-0.000074,0.000198,-0.000074,0.000198,2020-05-01 00:03:00
4,1588291440,0.000198,0.000198,-0.000289,-0.000289,2020-05-01 00:04:00
...,...,...,...,...,...,...
44635,1590969300,-0.000162,-0.000039,-0.000162,-0.000039,2020-05-31 23:55:00
44636,1590969360,-0.000039,0.000100,-0.000039,0.000100,2020-05-31 23:56:00
44637,1590969420,0.000100,0.000100,-0.000094,-0.000094,2020-05-31 23:57:00
44638,1590969480,-0.000094,0.000083,-0.000094,0.000083,2020-05-31 23:58:00


## loop

In [52]:
dataType = 'premium_index'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_premiumIndex(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            dowload_monthly_premium_index(symbol=symbol, year=year, month=month, print_mode=True)            


2021-03-22 19:19:15.636069 - dowloading BTCUSD data...
2021-03-22 19:19:15.641055 downloading data for premium index BTCUSD year 2019 and month 10
2021-03-22 19:19:18.872849 downloading data for premium index BTCUSD year 2019 and month 11
2021-03-22 19:19:31.898088 downloading data for premium index BTCUSD year 2019 and month 12
2021-03-22 19:19:44.357348 downloading data for premium index BTCUSD year 2020 and month 1
2021-03-22 19:19:58.676704 downloading data for premium index BTCUSD year 2020 and month 2
2021-03-22 19:20:10.216188 downloading data for premium index BTCUSD year 2020 and month 3
2021-03-22 19:20:23.045300 downloading data for premium index BTCUSD year 2020 and month 4
2021-03-22 19:20:35.039441 downloading data for premium index BTCUSD year 2020 and month 5
2021-03-22 19:20:47.500486 downloading data for premium index BTCUSD year 2020 and month 6
2021-03-22 19:21:02.608808 downloading data for premium index BTCUSD year 2020 and month 7
2021-03-22 19:21:17.273258 downl

# spot ohlc

## unit functions

In [26]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.pkl','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc

def get_list_filename_spotIndex(symbol):
    if symbol in get_list_symbols('spot_index'):
        res = requests.get(f'https://public.bybit.com/spot_index/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_spot_index(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for spot index {symbol} year {year} and month {month}')
    # loop on day
    while date.month == month:    
        url = 'https://public.bybit.com/spot_index/:symbol/:symbol:year-:month-:day_index_price.pkl'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.pkl', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.pkl')
            r = requests.get(url)
            f.write(r.content)            
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['start_at', 'symbol', 'period', 'open', 'high', 'low', 'close'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path)],axis=0)
        os.remove(path)    
    df = df.reset_index(drop=True).rename(columns={"start_at":"time"}).drop(['symbol', 'period'], axis=1)
    # upload
    path = 'D://Trading/Data/Crypto/Bybit/spot_ohlc/:symbol/1min/:year/:year-:month.pkl'
    for key, item in params.items():
        path = path.replace(':'+key, item)
    
    df.to_csv(path,index=False)
        
def init_spot_ohlc():
    
    if 'infos.json' not in os.listdir('D://Trading/Data/Crypto/Bybit'):
        dic = {}
    else:    
        with open('D://Trading/Data/Crypto/Bybit/infos.json', 'r') as f:
            dic = json.load(f)
            f.close()
    
    if 'spot_ohlc' not in dic:
        dic['spot_ohlc'] = {            
            "timeframe_str": {
                "1min": {
                    "file_length": "1m",
                    "columns": [
                      "time",
                      "open",
                      "high",
                      "low",
                      "close"
                    ],
                    "timeframe_sec": 60,
                    "hasVolume": false,
                    "unit_time": "s",
                    "is_gz": true,
                    "is_native": true,
                    "description": "ohlc of futures spot, i.e candlesticks of asset price",
                    "dic_year_month": {}
                }
            }
        }
            
    with open('D://Trading/Data/Crypto/Bybit/infos.json', 'w') as f:
        json.dump(dic,f)
        f.close()
        
init_spot_ohlc()

In [22]:
dowload_monthly_spot_index(symbol='BTCUSD', year=2020, month=5, print_mode=True)

2021-03-22 18:33:56.709749 downloading data for spot index BTCUSD year 2020 and month 5


In [29]:
df = pd.read_csv('D://Trading/Data/Crypto/Bybit/spot_ohlc/BTCUSD/1min/2020/2020-05.pkl')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,open,high,low,close,date
0,1588291200,8627.36,8648.55,8623.64,8648.19,2020-05-01 00:00:00
1,1588291260,8648.19,8673.70,8648.19,8662.79,2020-05-01 00:01:00
2,1588291320,8662.79,8663.38,8635.14,8635.93,2020-05-01 00:02:00
3,1588291380,8635.93,8650.39,8634.29,8646.25,2020-05-01 00:03:00
4,1588291440,8646.25,8651.45,8638.69,8638.69,2020-05-01 00:04:00
...,...,...,...,...,...,...
44635,1590969300,9466.29,9470.60,9466.29,9468.67,2020-05-31 23:55:00
44636,1590969360,9468.67,9473.03,9468.67,9471.83,2020-05-31 23:56:00
44637,1590969420,9471.83,9471.83,9463.85,9464.69,2020-05-31 23:57:00
44638,1590969480,9464.69,9465.43,9455.67,9456.63,2020-05-31 23:58:00


## loop

In [28]:
dataType = 'spot_index'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_spotIndex(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            if symbol not in os.listdir('D://Trading/Data/Crypto/Bybit/spot_ohlc'):
                if str(year) not in os.listdir(f'D://Trading/Data/Crypto/Bybit/spot_ohlc/{symbol}/1min') or True:
                    dowload_monthly_spot_index(symbol=symbol, year=year, month=month, print_mode=True)   

2021-03-22 18:43:15.168060 - dowloading BTCUSD data...
2021-03-22 18:43:15.172073 downloading data for spot index BTCUSD year 2019 and month 10
2021-03-22 18:43:32.975429 downloading data for spot index BTCUSD year 2019 and month 11
2021-03-22 18:43:49.298813 downloading data for spot index BTCUSD year 2019 and month 12
2021-03-22 18:44:07.836969 downloading data for spot index BTCUSD year 2020 and month 1
2021-03-22 18:44:25.119595 downloading data for spot index BTCUSD year 2020 and month 2
2021-03-22 18:44:42.702640 downloading data for spot index BTCUSD year 2020 and month 3
2021-03-22 18:45:00.220872 downloading data for spot index BTCUSD year 2020 and month 4
2021-03-22 18:45:17.995735 downloading data for spot index BTCUSD year 2020 and month 5
2021-03-22 18:45:20.977689 downloading data for spot index BTCUSD year 2020 and month 6
2021-03-22 18:45:36.914657 downloading data for spot index BTCUSD year 2020 and month 7
2021-03-22 18:45:55.666085 downloading data for spot index BTC

# Trade history tick

## unit functions

In [26]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.csv.gz','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc
    
def get_list_filename_trading(symbol):
    if symbol in get_list_symbols('trading'):
        res = requests.get(f'https://public.bybit.com/trading/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_trading(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for trades index {symbol} year {year} and month {month}')
    # test
    tmp_url = ':symbol:year-:month-:day.csv.gz'
    params = {
        'symbol': symbol,
        'year': date_to_ymd_str(date)[0],
        'month': date_to_ymd_str(date)[1],
        'day': date_to_ymd_str(date)[2]
    }    
    for key, item in params.items():
        tmp_url = tmp_url.replace(':'+key, item)
    if tmp_url not in get_list_filename_trading(symbol):
        print(tmp_url, 'not in historical data')
        return 0
    
    # loop on day    
    while date.month == month:    
        url = 'https://public.bybit.com/trading/:symbol/:symbol:year-:month-:day.csv.gz'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz')
            r = requests.get(url)
            f.write(r.content)         
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['timestamp', 'symbol', 'side', 'size', 'price', 'tickDirection', 'trdMatchID', 'grossValue', 'homeNotional', 'foreignNotional'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path).iloc[::-1]],axis=0)
        os.remove(path)        
    df = df.reset_index(drop=True).rename(columns={"timestamp":"time"}).drop(['symbol', 'trdMatchID', 'grossValue', 'homeNotional'], axis=1)
    # upload
    try:
        os.makedirs('D://Trading/Data/Crypto/Bybit/trades_raw_tick/:symbol'.replace(':symbol',symbol))
    except:
        pass
    path = 'D://Trading/Data/Crypto/Bybit/trades_raw_tick/:symbol/:year-:month.pkl'
    for key, item in params.items():
        path = path.replace(':'+key, item)
    
    df.to_pickle(path)
    
    return 1

In [27]:
dowload_monthly_trading(symbol='BTCUSD', year=2020, month=5, print_mode=False)

1

In [28]:
df = pd.read_pickle('D://Trading/Data/Crypto/Bybit/trades_raw_tick/BTCUSD/2020-05.pkl')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,side,size,price,tickDirection,foreignNotional,date
0,1.588291e+09,Sell,1853,8628.5,MinusTick,0.214753,2020-05-01 00:00:00.122529024
1,1.588291e+09,Sell,59099,8628.5,ZeroMinusTick,6.849279,2020-05-01 00:00:00.122529024
2,1.588291e+09,Sell,37601,8628.5,ZeroMinusTick,4.357768,2020-05-01 00:00:00.127640064
3,1.588291e+09,Sell,19054,8628.5,ZeroMinusTick,2.208263,2020-05-01 00:00:00.127640064
4,1.588291e+09,Sell,93345,8628.5,ZeroMinusTick,10.818219,2020-05-01 00:00:00.127640064
...,...,...,...,...,...,...,...
6439357,1.590970e+09,Sell,7638,9443.0,MinusTick,0.808853,2020-05-31 23:59:58.405544960
6439358,1.590970e+09,Sell,125,9443.0,ZeroMinusTick,0.013237,2020-05-31 23:59:58.417413888
6439359,1.590970e+09,Buy,197,9443.5,PlusTick,0.020861,2020-05-31 23:59:59.141096960
6439360,1.590970e+09,Buy,459,9443.5,ZeroMinusTick,0.048605,2020-05-31 23:59:59.213433856


## loop

In [None]:
dataType = 'trading'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_trading(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            try:
                dowload_monthly_trading(symbol=symbol, year=year, month=month, print_mode=True)   
            except:
                print(f'{datetime.datetime.now()} ERROR with {symbol} {year} {month}')

2021-04-08 09:17:25.994490 - dowloading ADAUSDT data...
2021-04-08 09:17:25.997468 downloading data for trades index ADAUSDT year 2021 and month 3
ADAUSDT2021-03-01.csv.gz not in historical data
2021-04-08 09:17:26.612559 - dowloading BCHUSDT data...
2021-04-08 09:17:26.613587 downloading data for trades index BCHUSDT year 2020 and month 12
BCHUSDT2020-12-01.csv.gz not in historical data
2021-04-08 09:17:26.729281 downloading data for trades index BCHUSDT year 2021 and month 1
2021-04-08 09:17:53.589187 downloading data for trades index BCHUSDT year 2021 and month 2
2021-04-08 09:18:17.174450 downloading data for trades index BCHUSDT year 2021 and month 3
2021-04-08 09:18:59.999650 - dowloading BTCUSD data...
2021-04-08 09:19:00.003666 downloading data for trades index BTCUSD year 2019 and month 10
2021-04-08 09:20:08.750779 downloading data for trades index BTCUSD year 2019 and month 11
2021-04-08 09:21:29.375020 downloading data for trades index BTCUSD year 2019 and month 12
2021-04-

In [5]:
for symbol in os.listdir('D://Trading/Data/Crypto/Bybit/trades'):
    import shutil

    shutil.rmtree(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/1min')

# custom 1min ohlc

## infos gathering

In [6]:
def update_json(assetType, broker, instrument, timeframe, symbol, columns, dic_resamp, symbol_infos):
    if f'{broker}.json' in os.listdir(f'D://Trading/Data/{assetType}'):
        with open(f'D://Trading/Data/{assetType}/{broker}.json','r') as f:
            dic = json.load(f)
            f.close()
    else:
        dic = {}
    if instrument not in dic:
        dic[instrument] = {}
    if timeframe not in dic[instrument]:
        dic[instrument][timeframe] = {}
    dic[instrument][timeframe][symbol] = {
        "columns": columns,
        "resampling_dict": dic_resamp,
        "infos": symbol_infos
    }
    with open(f'D://Trading/Data/{assetType}/{broker}.json','w') as f:
        json.dump(dic, f)
        f.close()

def update_infos(assetType, broker, instrument, timeframe, symbol, df, markets_bybit):
    columns = [col for col in df.columns]    
    list_resamp_first_col = ['time', 'open', 'ask_open', 'bid_open']
    list_resamp_last_col = ['close', 'ask_close', 'bid_close']
    list_resamp_max_col = ['high', 'ask_high', 'bid_high']
    list_resamp_min_col = ['low', 'ask_low', 'bid_low']
    list_resamp_sum_col = ['size', 'foreignNotional', 'v_b', 'v_s', 'v_weigted_p', 'v_weigted_p_b', 'v_weigted_p_s', 'qty', 'qty_b', 'qty_s']
    list_resamp_mean_col = ['v_b_over_v', 'v_s_over_v', 'p_mean', 'v_weigted_p_over_p_mean', 'v_weigted_p_b_over_v_weigted_p', 'v_weigted_p_s_over_v_weigted_p', 'p_std', 'p_std_b', 'p_std_s', 'spread']
    dic_resamp = {}
    for col in df.columns:
        if col in list_resamp_first_col:
            dic_resamp[col] = 'first'
        elif col in list_resamp_last_col:
            dic_resamp[col] = 'last'
        elif col in list_resamp_max_col:
            dic_resamp[col] = 'max'
        elif col in list_resamp_min_col:
            dic_resamp[col] = 'min'
        elif col in list_resamp_sum_col:
            dic_resamp[col] = 'sum'
        elif col in list_resamp_mean_col:
            dic_resamp[col] = 'mean'
        else:
            dic_resamp[col] = 'mean'
            
    bybit_info = markets_bybit[symbol]
    symbol_infos = {'point':bybit_info['precision']['price'], 
                    'contract_value_in_usd':bybit_info['precision']['amount'], 
                    'volume_step':bybit_info['limits']['amount']['min'], 
                    'volume_max':bybit_info['limits']['amount']['max'], 
                    'min_point_stop':0,
                    'taker_fee':bybit_info['taker'],
                    'maker_fee':bybit_info['maker'],
                   }
    
    
    update_json(assetType, broker, instrument, timeframe, symbol, columns, dic_resamp, symbol_infos)

## unit functions

In [7]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def custom_transfo(x):
    v = x['size'].ffill()
    p = x['price'].ffill()
    d = x['side'].ffill()
    t = x['time'].ffill()
    
    qty = len(d) # nbs trades
    
    if qty > 0:
    
        v_b = v[d=='Buy'].sum() # volume of buy order
        v_s = v[d=='Sell'].sum() # volume of sell order

        if v_b + v_s > 0:
            v_b_over_v = v_b / (v_b + v_s)
            v_s_over_v = 1 - v_b_over_v
        else:
            v_b_over_v = 0
            v_s_over_v = 0

        p_mean = p.mean() # average price

        v_weigted_p = (v*p).sum() / v.sum() # volume averaged price
        if v_b > 0:
            v_weigted_p_b = (v*p)[d=='Buy'].sum() / v_b # volume averaged price buy side
            dt_b_std = t[d=='Buy'].diff().dropna().std()
        else:
            v_weigted_p_b = 0
            dt_b_std = 0
        if v_s > 0:
            v_weigted_p_s = (v*p)[d=='Sell'].sum() / v_s # volume averaged price sell side
            dt_s_std = t[d=='Sell'].diff().dropna().std()
        else:
            v_weigted_p_s = 0        
            dt_s_std = 0

        v_weigted_p_over_p_mean = v_weigted_p / p_mean
        v_weigted_p_b_over_v_weigted_p = v_weigted_p_b / v_weigted_p
        v_weigted_p_s_over_v_weigted_p = v_weigted_p_s / v_weigted_p

        p_std = p.std() # std price
        p_std_b = p[d=='Buy'].std() # std buy side prices
        p_std_s = p[d=='Sell'].std() # std sell side prices

        qty = len(d) # nbs trades
        qty_b = np.array([d=='Buy']).sum()
        qty_s = qty - qty_b # nbs sell trades
        
        qty_b_over_qty = qty_b / qty
        qty_s_over_qty = qty_s / qty
        
        dt_std = t.diff().dropna().std()

        list_to_append = [v_b, v_s, v_b_over_v, v_s_over_v, p_mean, v_weigted_p, v_weigted_p_b, v_weigted_p_s, v_weigted_p_over_p_mean, v_weigted_p_b_over_v_weigted_p, 
                          v_weigted_p_s_over_v_weigted_p, p_std, p_std_b, p_std_s, qty, qty_b, qty_s, qty_b_over_qty, qty_s_over_qty, dt_std, dt_b_std, dt_s_std]     
        list_index = ['v_b', 'v_s', 'v_b_over_v', 'v_s_over_v', 'p_mean', 'v_weigted_p', 'v_weigted_p_b', 'v_weigted_p_s', 'v_weigted_p_over_p_mean', 'v_weigted_p_b_over_v_weigted_p', 
                      'v_weigted_p_s_over_v_weigted_p', 'p_std', 'p_std_b', 'p_std_s', 'qty', 'qty_b', 'qty_s', 'qty_b_over_qty', 'qty_s_over_qty', "dt_std", "dt_b_std", "dt_s_std"]

        return pd.Series(list_to_append, index=list_index)
    
    else:
        

        list_to_append = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]     
        list_index = ['v_b', 'v_s', 'v_b_over_v', 'v_s_over_v', 'p_mean', 'v_weigted_p', 'v_weigted_p_b', 'v_weigted_p_s', 'v_weigted_p_over_p_mean', 'v_weigted_p_b_over_v_weigted_p', 
                      'v_weigted_p_s_over_v_weigted_p', 'p_std', 'p_std_b', 'p_std_s', 'qty', 'qty_b', 'qty_s', 'qty_b_over_qty', 'qty_s_over_qty', "dt_std", "dt_b_std", "dt_s_std"]

        return pd.Series(list_to_append, index=list_index)



def resamp_df(symbol, year_str, month_str, markets_bybit):
    assetType = 'Crypto'
    broker = 'Bybit'
    
    if symbol in os.listdir(f'D://Trading/Data/{assetType}/{broker}/trades_raw_tick'):
        if f'{year_str}-{month_str}.pkl' in os.listdir(f'D://Trading/Data/{assetType}/{broker}/trades_raw_tick/{symbol}'):


            df = pd.read_pickle(f'D://Trading/Data/{assetType}/{broker}/trades_raw_tick/{symbol}/{year_str}-{month_str}.pkl')
            df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
            
            bids = np.zeros(len(df))
            asks = np.zeros(len(df))
            tickDir = df.tickDirection.to_numpy()
            price = df.price.to_numpy()
            bids[tickDir=='PlusTick'] = price[tickDir=='PlusTick'] - 0.5
            asks[tickDir=='PlusTick'] = price[tickDir=='PlusTick']
            bids[tickDir=='MinusTick'] = price[tickDir=='MinusTick']
            asks[tickDir=='MinusTick'] = price[tickDir=='MinusTick'] + 0.5
            bbids[tickDir=='ZeroPlusTick'] = price[tickDir=='ZeroPlusTick'] - 0.5
            asks[tickDir=='ZeroPlusTick'] = price[tickDir=='ZeroPlusTick']
            bids[tickDir=='ZeroMinusTick'] = price[tickDir=='ZeroMinusTick']
            asks[tickDir=='ZeroMinusTick'] = price[tickDir=='ZeroMinusTick'] + 0.5
            df.loc[:,'ask'] = asks
            df.loc[:,'bid'] = bids
            df.loc[:,'midprice'] = (bids+asks)/2
            

            resamp = df.set_index('date').resample('1min')
            groupby = df.set_index('date').groupby(pd.Grouper(freq='1min'))

            df2 = resamp.agg({'time':'first', 
                              'midprice':'ohlc',
                              'ask':'ohlc',
                              'bid':'ohlc',
                              'size':'sum',
                              'foreignNotional':'sum'})
            df3 = groupby.apply(custom_transfo)

            df4 = pd.concat([df2, df3], axis=1)
            df4 = df4.rename(columns={
                ('time', 'time'):'time',
                ('midprice', 'open'): 'open',
                ('midprice', 'high'): 'high',
                ('midprice', 'low'): 'low',
                ('midprice', 'close'): 'close',                
                ('ask', 'open'): 'ask_open',
                ('ask', 'high'): 'ask_high',
                ('ask', 'low'): 'ask_low',
                ('ask', 'close'): 'ask_close',                
                ('bid', 'open'): 'bid_open',
                ('bid', 'high'): 'bid_high',
                ('bid', 'low'): 'bid_low',
                ('bid', 'close'): 'bid_close',                
                ('size', 'size'): 'size',
                ('foreignNotional', 'foreignNotional'): 'foreignNotional',
            }).reset_index(drop=True)

            path = f'D://Trading/Data/{assetType}/{broker}/1min_customOHLC_from_trades/{symbol}/{year_str}-{month_str}.pkl'

            try:
                os.makedirs(f'D://Trading/Data/{assetType}/{broker}/1min_customOHLC_from_trades/{symbol}/')
            except:
                pass

            df4.to_pickle(path)

            #update_infos(assetType, broker, 'trades', '1min', symbol, df4, markets_bybit)
                
import ccxt
markets_bybit = {market['id']:market for market in ccxt.bybit().fetch_markets()}

In [8]:
resamp_df('XRPUSD', '2020', '05', markets_bybit)

In [13]:
pd.read_pickle('D://Trading/Data/Crypto/Bybit/1min_customOHLC_from_trades/XRPUSD/1min/2020-05.pkl')

Unnamed: 0,time,open,high,low,close,size,foreignNotional,v_b,v_s,v_b_over_v,...,dt_b_std,dt_s_std,ask_open,bid_open,ask_high,bid_high,ask_low,bid_low,ask_close,bid_close
0,1.588291e+09,0.2116,0.2118,0.2114,0.2118,13738,64964.465801,4689.0,9049.0,0.341316,...,11.155793,,0.21165,0.21155,0.21185,0.21175,0.21145,0.21135,0.21185,0.21175
1,1.588291e+09,0.2118,0.2125,0.2118,0.2124,59250,279114.164034,59076.0,174.0,0.997063,...,2.709791,9.882334,0.21185,0.21175,0.21255,0.21245,0.21185,0.21175,0.21245,0.21235
2,1.588291e+09,0.2123,0.2124,0.2120,0.2120,6879,32415.401708,6720.0,159.0,0.976886,...,14.745909,20.451007,0.21235,0.21225,0.21245,0.21235,0.21205,0.21195,0.21205,0.21195
3,1.588291e+09,0.2120,0.2124,0.2120,0.2123,46868,220863.179564,40181.0,6687.0,0.857323,...,8.464944,3.685812,0.21205,0.21195,0.21245,0.21235,0.21205,0.21195,0.21235,0.21225
4,1.588291e+09,0.2124,0.2124,0.2123,0.2123,40912,192619.882411,39929.0,983.0,0.975973,...,15.152223,16.632835,0.21245,0.21235,0.21245,0.21235,0.21235,0.21225,0.21235,0.21225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44634,1.590969e+09,0.2025,0.2025,0.2025,0.2025,4814,23772.839506,800.0,4014.0,0.166182,...,7.133619,0.001463,0.20255,0.20245,0.20255,0.20245,0.20255,0.20245,0.20255,0.20245
44635,1.590969e+09,0.2026,0.2029,0.2026,0.2029,64990,320311.618316,64911.0,79.0,0.998784,...,3.035913,0.711883,0.20265,0.20255,0.20295,0.20285,0.20265,0.20255,0.20295,0.20285
44636,1.590969e+09,0.2029,0.2029,0.2028,0.2029,67975,335024.540611,64975.0,3000.0,0.955866,...,8.291157,,0.20295,0.20285,0.20295,0.20285,0.20285,0.20275,0.20295,0.20285
44637,1.590969e+09,0.2028,0.2028,0.2026,0.2026,376,1854.050693,1.0,375.0,0.002660,...,,2.923624,0.20285,0.20275,0.20285,0.20275,0.20265,0.20255,0.20265,0.20255


## loop

In [14]:
dic_op_to_do = {}
for symbol in os.listdir('D://Trading/Data/Crypto/Bybit/1min_customOHLC_from_trades'):
    if symbol not in dic_op_to_do:
        dic_op_to_do[symbol] = {}
        if year_str not in dic_op_to_do[symbol]:
            dic_op_to_do[symbol][year_str] = []
    for filename in os.listdir(f'D://Trading/Data/Crypto/Bybit/1min_customOHLC_from_trades/{symbol}'):
        if ".pkl" in filename and year_str in filename:
            month_str = filename.split('.pkl')[0].split(f'{year_str}-')[1]
            if month_str not in dic_op_to_do[symbol][year_str]:
                dic_op_to_do[symbol][year_str].append(month_str)
                    
print(dic_op_to_do)

{'BCHUSDT': {'2021': ['01', '02', '03']}, 'BTCUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02', '03']}, 'BTCUSDT': {'2020': ['04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02', '03']}, 'EOSUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02', '03']}, 'ETHUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02', '03']}, 'ETHUSDT': {'2020': ['11', '12'], '2021': ['01', '02', '03']}, 'LINKUSDT': {'2020': ['11', '12'], '2021': ['01', '02', '03']}, 'LTCUSDT': {'2020': ['11', '12'], '2021': ['01', '02', '03']}, 'XRPUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02', '03']}, 'XTZUSDT': {'2020': ['11', '12'], '2021': ['01', '02', '03']}}


In [16]:
for symbol in dic_op_to_do.keys():
    if symbol != 'BTCUSD':
        for year_str in dic_op_to_do[symbol].keys():
            for month_str in dic_op_to_do[symbol][year_str]:
                print(f'{datetime.datetime.now()} - resampling {symbol} of month {year_str}-{month_str}')
                try:
                    resamp_df(symbol, year_str, month_str, markets_bybit)
                except:
                    print('error',symbol, year_str, month_str)
            

2021-04-07 18:33:15.286634 - resampling BCHUSDT of month 2021-01
2021-04-07 18:36:15.178325 - resampling BCHUSDT of month 2021-02
2021-04-07 18:38:56.794853 - resampling BCHUSDT of month 2021-03
2021-04-07 18:41:41.169360 - resampling BTCUSDT of month 2020-04
2021-04-07 18:44:20.628720 - resampling BTCUSDT of month 2020-05
2021-04-07 18:47:11.321149 - resampling BTCUSDT of month 2020-06
2021-04-07 18:49:49.732321 - resampling BTCUSDT of month 2020-07
2021-04-07 18:52:47.635405 - resampling BTCUSDT of month 2020-08
2021-04-07 18:55:50.618616 - resampling BTCUSDT of month 2020-09
2021-04-07 18:58:31.775162 - resampling BTCUSDT of month 2020-10
2021-04-07 19:01:14.558925 - resampling BTCUSDT of month 2020-11
2021-04-07 19:04:32.561775 - resampling BTCUSDT of month 2020-12
2021-04-07 19:07:41.735412 - resampling BTCUSDT of month 2021-01
2021-04-07 19:10:54.282048 - resampling BTCUSDT of month 2021-02
2021-04-07 19:13:45.444980 - resampling BTCUSDT of month 2021-03
2021-04-07 19:16:55.50075