# premium ohlc

## unit functions

In [50]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.csv.gz','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc

def get_list_filename_premiumIndex(symbol):
    if symbol in get_list_symbols('premium_index'):
        res = requests.get(f'https://public.bybit.com/premium_index/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_premium_index(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for premium index {symbol} year {year} and month {month}')
    # loop on day
    while date.month == month:    
        url = 'https://public.bybit.com/premium_index/:symbol/:symbol:year-:month-:day_premium_index.csv.gz'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz')
            r = requests.get(url)
            f.write(r.content)            
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['start_at', 'symbol', 'period', 'open', 'high', 'low', 'close'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path)],axis=0)
        os.remove(path)        
    df = df.reset_index(drop=True).rename(columns={"start_at":"time"}).drop(['symbol', 'period'], axis=1)
    # upload
    path = 'D://Trading/Data/Crypto/Bybit/premium_ohlc/:symbol/1min/:year/:year-:month.csv.gz'
    for key, item in params.items():
        path = path.replace(':'+key, item)
        
    folders_to_create = path[4:].split('/')[:-1]
    tmp_path = 'D://'
    for folder in folders_to_create:
        if folder not in os.listdir(tmp_path):
            os.mkdir(tmp_path+folder)
        tmp_path += folder + '/'
    
    df.to_csv(path,index=False)
        
def init_premium_ohlc():
    
    if 'infos.json' not in os.listdir('D://Trading/Data/Crypto/Bybit'):
        dic = {}
    else:    
        with open('D://Trading/Data/Crypto/Bybit/infos.json', 'r') as f:
            dic = json.load(f)
            f.close()
    
    if 'premium_ohlc' not in dic:
        dic['premium_ohlc'] = {
            "timeframe_str": {
                "1min": {
                    "file_length": "1m",
                    "columns": [
                      "time",
                      "open",
                      "high",
                      "low",
                      "close"
                    ],
                    "timeframe_sec": 60,
                    "hasVolume": false,
                    "unit_time": "s",
                    "is_gz": true,
                    "is_native": true,
                    "description": "ohlc of futures premium, i.e the difference between the perpetual contracts relative to the mark price. premium = 0.000003 => futures price is 0.03% higher than mark price",
                    "dic_year_month": {}
                }
            }
        }
            
    with open('D://Trading/Data/Crypto/Bybit/infos.json', 'w') as f:
        json.dump(dic,f)
        f.close()
        
init_premium_ohlc()

In [93]:
dowload_monthly_premium_index(symbol='BTCUSD', year=2020, month=5, print_mode=True)

2021-03-22 17:48:33.797173 downloading data for premium index BTCUSD year 2020 and month 5


In [37]:
df = pd.read_csv('D://Trading/Data/Crypto/Bybit/premium_ohlc/BTCUSD/1min/2020/2020-05.csv.gz')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,open,high,low,close,date
0,1588291200,0.000025,0.000100,0.000025,0.000100,2020-05-01 00:00:00
1,1588291260,0.000100,0.000100,-0.000169,-0.000169,2020-05-01 00:01:00
2,1588291320,-0.000169,-0.000074,-0.000169,-0.000074,2020-05-01 00:02:00
3,1588291380,-0.000074,0.000198,-0.000074,0.000198,2020-05-01 00:03:00
4,1588291440,0.000198,0.000198,-0.000289,-0.000289,2020-05-01 00:04:00
...,...,...,...,...,...,...
44635,1590969300,-0.000162,-0.000039,-0.000162,-0.000039,2020-05-31 23:55:00
44636,1590969360,-0.000039,0.000100,-0.000039,0.000100,2020-05-31 23:56:00
44637,1590969420,0.000100,0.000100,-0.000094,-0.000094,2020-05-31 23:57:00
44638,1590969480,-0.000094,0.000083,-0.000094,0.000083,2020-05-31 23:58:00


## loop

In [52]:
dataType = 'premium_index'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_premiumIndex(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            dowload_monthly_premium_index(symbol=symbol, year=year, month=month, print_mode=True)            


2021-03-22 19:19:15.636069 - dowloading BTCUSD data...
2021-03-22 19:19:15.641055 downloading data for premium index BTCUSD year 2019 and month 10
2021-03-22 19:19:18.872849 downloading data for premium index BTCUSD year 2019 and month 11
2021-03-22 19:19:31.898088 downloading data for premium index BTCUSD year 2019 and month 12
2021-03-22 19:19:44.357348 downloading data for premium index BTCUSD year 2020 and month 1
2021-03-22 19:19:58.676704 downloading data for premium index BTCUSD year 2020 and month 2
2021-03-22 19:20:10.216188 downloading data for premium index BTCUSD year 2020 and month 3
2021-03-22 19:20:23.045300 downloading data for premium index BTCUSD year 2020 and month 4
2021-03-22 19:20:35.039441 downloading data for premium index BTCUSD year 2020 and month 5
2021-03-22 19:20:47.500486 downloading data for premium index BTCUSD year 2020 and month 6
2021-03-22 19:21:02.608808 downloading data for premium index BTCUSD year 2020 and month 7
2021-03-22 19:21:17.273258 downl

# spot ohlc

## unit functions

In [26]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.csv.gz','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc

def get_list_filename_spotIndex(symbol):
    if symbol in get_list_symbols('spot_index'):
        res = requests.get(f'https://public.bybit.com/spot_index/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_spot_index(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for spot index {symbol} year {year} and month {month}')
    # loop on day
    while date.month == month:    
        url = 'https://public.bybit.com/spot_index/:symbol/:symbol:year-:month-:day_index_price.csv.gz'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz')
            r = requests.get(url)
            f.write(r.content)            
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['start_at', 'symbol', 'period', 'open', 'high', 'low', 'close'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path)],axis=0)
        os.remove(path)    
    df = df.reset_index(drop=True).rename(columns={"start_at":"time"}).drop(['symbol', 'period'], axis=1)
    # upload
    path = 'D://Trading/Data/Crypto/Bybit/spot_ohlc/:symbol/1min/:year/:year-:month.csv.gz'
    for key, item in params.items():
        path = path.replace(':'+key, item)
    
    df.to_csv(path,index=False)
        
def init_spot_ohlc():
    
    if 'infos.json' not in os.listdir('D://Trading/Data/Crypto/Bybit'):
        dic = {}
    else:    
        with open('D://Trading/Data/Crypto/Bybit/infos.json', 'r') as f:
            dic = json.load(f)
            f.close()
    
    if 'spot_ohlc' not in dic:
        dic['spot_ohlc'] = {            
            "timeframe_str": {
                "1min": {
                    "file_length": "1m",
                    "columns": [
                      "time",
                      "open",
                      "high",
                      "low",
                      "close"
                    ],
                    "timeframe_sec": 60,
                    "hasVolume": false,
                    "unit_time": "s",
                    "is_gz": true,
                    "is_native": true,
                    "description": "ohlc of futures spot, i.e candlesticks of asset price",
                    "dic_year_month": {}
                }
            }
        }
            
    with open('D://Trading/Data/Crypto/Bybit/infos.json', 'w') as f:
        json.dump(dic,f)
        f.close()
        
init_spot_ohlc()

In [22]:
dowload_monthly_spot_index(symbol='BTCUSD', year=2020, month=5, print_mode=True)

2021-03-22 18:33:56.709749 downloading data for spot index BTCUSD year 2020 and month 5


In [29]:
df = pd.read_csv('D://Trading/Data/Crypto/Bybit/spot_ohlc/BTCUSD/1min/2020/2020-05.csv.gz')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,open,high,low,close,date
0,1588291200,8627.36,8648.55,8623.64,8648.19,2020-05-01 00:00:00
1,1588291260,8648.19,8673.70,8648.19,8662.79,2020-05-01 00:01:00
2,1588291320,8662.79,8663.38,8635.14,8635.93,2020-05-01 00:02:00
3,1588291380,8635.93,8650.39,8634.29,8646.25,2020-05-01 00:03:00
4,1588291440,8646.25,8651.45,8638.69,8638.69,2020-05-01 00:04:00
...,...,...,...,...,...,...
44635,1590969300,9466.29,9470.60,9466.29,9468.67,2020-05-31 23:55:00
44636,1590969360,9468.67,9473.03,9468.67,9471.83,2020-05-31 23:56:00
44637,1590969420,9471.83,9471.83,9463.85,9464.69,2020-05-31 23:57:00
44638,1590969480,9464.69,9465.43,9455.67,9456.63,2020-05-31 23:58:00


## loop

In [28]:
dataType = 'spot_index'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_spotIndex(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            if symbol not in os.listdir('D://Trading/Data/Crypto/Bybit/spot_ohlc'):
                if str(year) not in os.listdir(f'D://Trading/Data/Crypto/Bybit/spot_ohlc/{symbol}/1min') or True:
                    dowload_monthly_spot_index(symbol=symbol, year=year, month=month, print_mode=True)   

2021-03-22 18:43:15.168060 - dowloading BTCUSD data...
2021-03-22 18:43:15.172073 downloading data for spot index BTCUSD year 2019 and month 10
2021-03-22 18:43:32.975429 downloading data for spot index BTCUSD year 2019 and month 11
2021-03-22 18:43:49.298813 downloading data for spot index BTCUSD year 2019 and month 12
2021-03-22 18:44:07.836969 downloading data for spot index BTCUSD year 2020 and month 1
2021-03-22 18:44:25.119595 downloading data for spot index BTCUSD year 2020 and month 2
2021-03-22 18:44:42.702640 downloading data for spot index BTCUSD year 2020 and month 3
2021-03-22 18:45:00.220872 downloading data for spot index BTCUSD year 2020 and month 4
2021-03-22 18:45:17.995735 downloading data for spot index BTCUSD year 2020 and month 5
2021-03-22 18:45:20.977689 downloading data for spot index BTCUSD year 2020 and month 6
2021-03-22 18:45:36.914657 downloading data for spot index BTCUSD year 2020 and month 7
2021-03-22 18:45:55.666085 downloading data for spot index BTC

# Trade history tick

## unit functions

In [59]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt

def date_to_str(date):
    return date.strftime('%Y-%m-%d')

def date_to_ymd_str(date):
    # return list of yyy, mm, dd as string (2 digit)
    return str(date).split(' ')[0].split('-')

def strTZ_to_date(txt):
    # ex: txt = '2019-08-01T00:00:00.000Z'
    return datetime.datetime.strptime(txt,'%Y-%m-%dT%H:%M:%S.%fZ')

def filename_to_date(filename, symbol):
    datestr = filename.replace(symbol,'').replace('_premium_index','').replace('_index_price','').replace('.csv.gz','')
    return datetime.datetime.strptime(datestr,'%Y-%m-%d')

def get_list_symbols(dataType):
    if dataType in ['premium_index', 'spot_index', 'trading']:
        res = requests.get(f'https://public.bybit.com/{dataType}/')
        list_doc = [doc.split('"')[1][:-1] for doc in res.text.split('href=')[1:]]
        return list_doc
    
def get_list_filename_trading(symbol):
    if symbol in get_list_symbols('trading'):
        res = requests.get(f'https://public.bybit.com/trading/{symbol}/')
        list_doc = [doc.split('"')[1] for doc in res.text.split('href=')[1:]]
        return list_doc

def dowload_monthly_trading(symbol='BTCUSD', year=2020, month=5, print_mode=False):      
    date = datetime.datetime(year, month, 1)    
    list_path = []
    if print_mode:
        print(f'{datetime.datetime.now()} downloading data for trades index {symbol} year {year} and month {month}')
    # test
    tmp_url = ':symbol:year-:month-:day.csv.gz'
    params = {
        'symbol': symbol,
        'year': date_to_ymd_str(date)[0],
        'month': date_to_ymd_str(date)[1],
        'day': date_to_ymd_str(date)[2]
    }    
    for key, item in params.items():
        tmp_url = tmp_url.replace(':'+key, item)
    if tmp_url not in get_list_filename_trading(symbol):
        print(tmp_url, 'not in historical data')
        return 0
    
    # loop on day    
    while date.month == month:    
        url = 'https://public.bybit.com/trading/:symbol/:symbol:year-:month-:day.csv.gz'
        # create url & path
        params = {
            'symbol': symbol,
            'year': date_to_ymd_str(date)[0],
            'month': date_to_ymd_str(date)[1],
            'day': date_to_ymd_str(date)[2]
        }    
        for key, item in params.items():
            url = url.replace(':'+key, item)
       # request + up^load data
        with open(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz', "wb") as f:
            list_path.append(f'tmp_data/{date_to_ymd_str(date)[2]}.csv.gz')
            r = requests.get(url)
            f.write(r.content)         
        date += datetime.timedelta(days=1)
    # extract df month
    df = pd.DataFrame(columns=['timestamp', 'symbol', 'side', 'size', 'price', 'tickDirection', 'trdMatchID', 'grossValue', 'homeNotional', 'foreignNotional'])
    for path in list_path:
        df = pd.concat([df, pd.read_csv(path)],axis=0)
        os.remove(path)        
    df = df.reset_index(drop=True).rename(columns={"timestamp":"time"}).drop(['symbol', 'trdMatchID', 'grossValue', 'homeNotional'], axis=1)
    # upload
    path = 'D://Trading/Data/Crypto/Bybit/trades/:symbol/1min/:year/:year-:month.csv.gz'
    for key, item in params.items():
        path = path.replace(':'+key, item)
    
    df.to_csv(path,index=False)
    
    return 1
        
def init_trades():
    
    if 'infos.json' not in os.listdir('D://Trading/Data/Crypto/Bybit'):
        dic = {}
    else:    
        with open('D://Trading/Data/Crypto/Bybit/infos.json', 'r') as f:
            dic = json.load(f)
            f.close()
    
    if 'trades' not in dic:
        dic['trades'] = {
            "file_length": "1m",
            "columns" : ['time', 'open', 'high', 'low', 'close'],
            "timeframe_str": "tick",
            "timeframe_sec": 0,
            "hasVolume" : True,
            "unit_time": 's',
            "is_gz": True,
            "is_native": True,
            "description": "trades granular data, contain all trades made (tick by tick)",
            "dic_year_month": {}
        }
            
    with open('D://Trading/Data/Crypto/Bybit/infos.json', 'w') as f:
        json.dump(dic,f)
        f.close()
        
init_trades()

In [45]:
dowload_monthly_trading(symbol='BTCUSD', year=2020, month=5, print_mode=False)

In [46]:
df = pd.read_csv('D://Trading/Data/Crypto/Bybit/trades/BTCUSD/1min/2020/2020-05.csv.gz')
df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')
df

Unnamed: 0,time,side,size,price,tickDirection,foreignNotional,date
0,1.588378e+09,Buy,111,8831.5,ZeroMinusTick,0.012569,2020-05-01 23:59:58.390206976
1,1.588378e+09,Buy,1410,8831.5,PlusTick,0.159656,2020-05-01 23:59:58.305577984
2,1.588378e+09,Sell,475,8831.0,MinusTick,0.053788,2020-05-01 23:59:58.032474112
3,1.588378e+09,Buy,111,8831.5,PlusTick,0.012569,2020-05-01 23:59:57.340058112
4,1.588378e+09,Sell,2500,8831.0,ZeroMinusTick,0.283094,2020-05-01 23:59:54.425626112
...,...,...,...,...,...,...,...
6439357,1.590883e+09,Buy,1111,9702.5,ZeroMinusTick,0.114507,2020-05-31 00:00:01.964585984
6439358,1.590883e+09,Buy,2286,9702.5,ZeroMinusTick,0.235609,2020-05-31 00:00:01.722912000
6439359,1.590883e+09,Buy,2205,9702.5,ZeroMinusTick,0.227261,2020-05-31 00:00:01.717868032
6439360,1.590883e+09,Buy,50,9702.5,ZeroMinusTick,0.005153,2020-05-31 00:00:01.702946048


## loop

In [60]:
dataType = 'trading'
for symbol in get_list_symbols(dataType):
    dic_year_month = {}
    list_filename = get_list_filename_trading(symbol)
    print(f'{datetime.datetime.now()} - dowloading {symbol} data...')
    for filename in list_filename:
        date = filename_to_date(filename, symbol)
        if date.year not in dic_year_month:
            dic_year_month[date.year] = []
        if date.month not in dic_year_month[date.year]:
            dic_year_month[date.year].append(date.month)
    del dic_year_month[date.year][-1]
            
    for year in dic_year_month:
        for month in dic_year_month[year]:
            dowload_monthly_trading(symbol=symbol, year=year, month=month, print_mode=True)     

2021-03-22 19:44:38.682152 - dowloading ADAUSDT data...
2021-03-22 19:44:39.142469 - dowloading BCHUSDT data...
2021-03-22 19:44:39.143467 downloading data for trades index BCHUSDT year 2020 and month 12
BCHUSDT2020-12-01.csv.gz not in historical data
2021-03-22 19:44:39.298286 downloading data for trades index BCHUSDT year 2021 and month 1
2021-03-22 19:45:34.848219 downloading data for trades index BCHUSDT year 2021 and month 2
2021-03-22 19:46:27.663891 - dowloading BTCUSD data...
2021-03-22 19:46:27.668904 downloading data for trades index BTCUSD year 2019 and month 10
2021-03-22 19:48:08.532092 downloading data for trades index BTCUSD year 2019 and month 11
2021-03-22 19:49:46.252467 downloading data for trades index BTCUSD year 2019 and month 12
2021-03-22 19:51:42.712456 downloading data for trades index BTCUSD year 2020 and month 1
2021-03-22 19:54:04.954922 downloading data for trades index BTCUSD year 2020 and month 2
2021-03-22 19:56:21.552958 downloading data for trades ind

In [61]:
for symbol in os.listdir('D://Trading/Data/Crypto/Bybit/trades'):
    try:
        os.rename(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/1min',
                  f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick')
    except:
        print(symbol, 'already has renamed folder')

BCHUSDT already has renamed folder


# custom 1min ohlc

## unit functions

In [86]:
import json, os, gzip
import datetime
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

def custom_transfo(x):
    v = x['size'].ffill()
    p = x['price'].ffill()
    d = x['side'].ffill()
    
    qty = len(d) # nbs trades
    
    if qty > 0:
    
        v_b = v[d=='Buy'].sum() # volume of buy order
        v_s = v[d=='Sell'].sum() # volume of sell order

        if v_b + v_s > 0:
            v_b_over_v = v_b / (v_b + v_s)
            v_s_over_v = 1 - v_b_over_v
        else:
            v_b_over_v = 0
            v_s_over_v = 0

        p_mean = p.mean() # average price

        v_weigted_p = (v*p).sum() / v.sum() # volume averaged price
        if v_b > 0:
            v_weigted_p_b = (v*p)[d=='Buy'].sum() / v_b # volume averaged price buy side
        else:
            v_weigted_p_b = 10
        if v_s > 0:
            v_weigted_p_s = (v*p)[d=='Sell'].sum() / v_s # volume averaged price sell side
        else:
            v_weigted_p_s = 10

        v_weigted_p_over_p_mean = v_weigted_p / p_mean
        v_weigted_p_b_over_v_weigted_p = v_weigted_p_b / v_weigted_p
        v_weigted_p_s_over_v_weigted_p = v_weigted_p_s / v_weigted_p

        p_std = p.std() # std price
        p_std_b = p[d=='Buy'].std() # std buy side prices
        p_std_s = p[d=='Sell'].std() # std sell side prices

        qty = len(d) # nbs trades
        qty_b = np.array([d=='Buy']).sum()
        qty_s = qty - qty_b # nbs sell trades

        list_to_append = [v_b, v_s, v_b_over_v, v_s_over_v, p_mean, v_weigted_p, v_weigted_p_b, v_weigted_p_s, v_weigted_p_over_p_mean, v_weigted_p_b_over_v_weigted_p, v_weigted_p_s_over_v_weigted_p, p_std, p_std_b, p_std_s, qty, qty_b, qty_s]     
        list_index = ['v_b', 'v_s', 'v_b_over_v', 'v_s_over_v', 'p_mean', 'v_weigted_p', 'v_weigted_p_b', 'v_weigted_p_s', 'v_weigted_p_over_p_mean', 'v_weigted_p_b_over_v_weigted_p', 'v_weigted_p_s_over_v_weigted_p', 'p_std', 'p_std_b', 'p_std_s', 'qty', 'qty_b', 'qty_s']

        return pd.Series(list_to_append, index=list_index)
    
    else:
        

        list_to_append = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]     
        list_index = ['v_b', 'v_s', 'v_b_over_v', 'v_s_over_v', 'p_mean', 'v_weigted_p', 'v_weigted_p_b', 'v_weigted_p_s', 'v_weigted_p_over_p_mean', 'v_weigted_p_b_over_v_weigted_p', 'v_weigted_p_s_over_v_weigted_p', 'p_std', 'p_std_b', 'p_std_s', 'qty', 'qty_b', 'qty_s']

        return pd.Series(list_to_append, index=list_index)



def resamp_df(symbol, year_str, month_str):
    
    if symbol in os.listdir('D://Trading/Data/Crypto/Bybit/trades'):
        if year_str in os.listdir(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick'):
            if f'{year_str}-{month_str}.csv.gz' in os.listdir(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick/{year_str}'):
                

                df = pd.read_csv(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick/{year_str}/{year_str}-{month_str}.csv.gz')
                df.loc[:,'date'] = pd.to_datetime(df.time, unit='s')

                resamp = df.set_index('date').resample('1min')
                groupby = df.set_index('date').groupby(pd.Grouper(freq='1min'))

                df2 = resamp.agg({'time':'first', 
                                  'price':'ohlc',
                                  'size':'sum',
                                  'foreignNotional':'sum'})
                df3 = groupby.apply(custom_transfo)

                df4 = pd.concat([df2, df3], axis=1)
                df4 = df4.rename(columns={
                    ('time', 'time'):'time',
                    ('price', 'open'): 'open',
                    ('price', 'high'): 'high',
                    ('price', 'low'): 'low',
                    ('price', 'close'): 'close',
                    ('size', 'size'): 'size',
                    ('foreignNotional', 'foreignNotional'): 'foreignNotional',
                }).reset_index(drop=True)
                
                path = f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/1min/{year_str}/{year_str}-{month_str}.csv.gz'
                
                folders_to_create = path[4:].split('/')[:-1]
                tmp_path = 'D://'
                for folder in folders_to_create:
                    if folder not in os.listdir(tmp_path):
                        os.mkdir(tmp_path+folder)
                    tmp_path += folder + '/'

                df4.to_csv(path, index=False)

In [None]:
resamp_df('XRPUSD', '2020', '05')

In [94]:
pd.read_csv('D://Trading/Data/Crypto/Bybit/trades/ETHUSD/1min/2020/2020-08.csv.gz')

Unnamed: 0,time,open,high,low,close,size,foreignNotional,v_b,v_s,v_b_over_v,...,v_weigted_p_s,v_weigted_p_over_p_mean,v_weigted_p_b_over_v_weigted_p,v_weigted_p_s_over_v_weigted_p,p_std,p_std_b,p_std_s,qty,qty_b,qty_s
0,1.596240e+09,346.90,347.75,346.80,347.40,387358,1115.849925,314407.0,72951.0,0.811670,...,347.051399,0.999610,1.000060,0.999739,0.291716,0.284207,0.293258,211.0,169.0,42.0
1,1.596240e+09,347.40,347.85,347.40,347.80,573820,1650.309104,564398.0,9422.0,0.983580,...,347.709075,0.999934,1.000000,1.000013,0.097463,0.101341,0.065430,139.0,121.0,18.0
2,1.596240e+09,347.85,348.30,347.85,348.15,287418,825.478477,285824.0,1594.0,0.994454,...,348.157371,1.000257,1.000000,0.999925,0.145503,0.147242,0.131241,144.0,127.0,17.0
3,1.596240e+09,348.20,348.20,347.80,348.00,97025,278.763898,50122.0,46903.0,0.516589,...,347.915882,1.000102,1.000373,0.999602,0.121873,0.128730,0.113548,68.0,36.0,32.0
4,1.596240e+09,348.00,348.45,347.35,347.45,260883,749.767426,80397.0,180486.0,0.308173,...,347.890227,0.999659,1.000400,0.999822,0.338455,0.262318,0.341569,171.0,113.0,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44635,1.598918e+09,434.60,435.35,434.60,435.30,149453,343.794564,129411.0,20042.0,0.865898,...,434.950856,0.999483,0.999916,1.000540,0.196575,0.234042,0.092355,50.0,32.0,18.0
44636,1.598918e+09,435.35,435.40,434.90,434.90,105756,242.928422,77725.0,28031.0,0.734946,...,435.172507,1.000354,1.000137,0.999619,0.219676,0.153375,0.210683,38.0,15.0,23.0
44637,1.598918e+09,434.95,434.95,434.30,434.30,59068,135.976643,4035.0,55033.0,0.068311,...,434.358068,0.999775,1.001260,0.999908,0.183805,0.240535,0.169951,51.0,7.0,44.0
44638,1.598918e+09,434.25,434.30,433.80,433.95,356529,821.588808,7626.0,348903.0,0.021390,...,433.944633,0.999775,1.000637,0.999986,0.131535,0.138272,0.129643,116.0,21.0,95.0


## loop

In [87]:
dic_op_to_do = {}
for symbol in os.listdir('D://Trading/Data/Crypto/Bybit/trades'):
    if symbol not in dic_op_to_do:
        dic_op_to_do[symbol] = {}
    for year_str in os.listdir(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick'):
        if year_str not in dic_op_to_do[symbol]:
            dic_op_to_do[symbol][year_str] = []
        for filename in os.listdir(f'D://Trading/Data/Crypto/Bybit/trades/{symbol}/tick/{year_str}'):
            if ".csv" in filename and year_str in filename:
                month_str = filename.split('.csv')[0].split(f'{year_str}-')[1]
                if month_str not in dic_op_to_do[symbol][year_str]:
                    dic_op_to_do[symbol][year_str].append(month_str)
                    
print(dic_op_to_do)

{'BCHUSDT': {'2021': ['01', '02']}, 'BTCUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02']}, 'BTCUSDT': {'2020': ['04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02']}, 'EOSUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02']}, 'ETHUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02']}, 'ETHUSDT': {'2020': ['11', '12'], '2021': ['01', '02']}, 'LINKUSDT': {'2020': ['11', '12'], '2021': ['01', '02']}, 'LTCUSDT': {'2020': ['11', '12'], '2021': ['01', '02']}, 'XRPUSD': {'2019': ['10', '11', '12'], '2020': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'], '2021': ['01', '02']}, 'XTZUSDT': {'2020': ['11', '12'], '2021': ['01', '02']}}


In [88]:
for symbol in dic_op_to_do.keys():
    for year_str in dic_op_to_do[symbol].keys():
        for month_str in dic_op_to_do[symbol][year_str]:
            print(f'{datetime.datetime.now()} - resampling {symbol} of month {year_str}-{month_str}')
            try:
                resamp_df(symbol, year_str, month_str)
            except:
                print('error',symbol, year_str, month_str)
            

2021-03-23 09:05:22.714144 - resampling BCHUSDT of month 2021-01
2021-03-23 09:07:22.391386 - resampling BCHUSDT of month 2021-02
2021-03-23 09:09:06.875661 - resampling BTCUSD of month 2019-10
2021-03-23 09:11:05.340337 - resampling BTCUSD of month 2019-11
2021-03-23 09:13:00.258247 - resampling BTCUSD of month 2019-12
2021-03-23 09:15:04.034133 - resampling BTCUSD of month 2020-01
2021-03-23 09:17:07.764886 - resampling BTCUSD of month 2020-02
2021-03-23 09:19:05.758587 - resampling BTCUSD of month 2020-03
2021-03-23 09:21:17.706492 - resampling BTCUSD of month 2020-04
2021-03-23 09:23:22.046799 - resampling BTCUSD of month 2020-05
2021-03-23 09:25:31.199111 - resampling BTCUSD of month 2020-06
2021-03-23 09:27:30.843362 - resampling BTCUSD of month 2020-07
2021-03-23 09:29:42.681520 - resampling BTCUSD of month 2020-08
2021-03-23 09:32:05.559937 - resampling BTCUSD of month 2020-09
2021-03-23 09:34:20.232567 - resampling BTCUSD of month 2020-10
2021-03-23 09:36:40.050078 - resamplin