In [35]:
import requests
import json
from datetime import datetime, timedelta
import os
import pandas as pd
from tqdm import tqdm

In [28]:
file_dir = "/Users/minjiwon/solid-trade-core/research/data"

url_base = "https://api.upbit.com/v1/candles/minutes/1"
headers = {"accept": "application/json"}

In [26]:
def print_list_prettier(arr):
    if (type(arr) != list):
        print(arr)
        return

    for line in arr:
        print_list_prettier(line)


def save_parquet(df, file_name, attach_timestamp=True, postfix=""):
    file_name = '.parquet'.join([chunk for chunk in file_name.split('.parquet')])

    if attach_timestamp:
        timestamp = ''.join(str(datetime.now()).split('.')[:-1]).replace('-', '_').replace(':', '_').replace('.', '_').replace(' ', '_').replace('_', '')
        file_name += f"_{timestamp}"

    if postfix:
        file_name += f"_{postfix}"
        
    path = os.path.join(file_dir, f"{file_name}.parquet")
    df.to_parquet(path)
    print(f"Saved parquet file on: {path}")


def load_parquet(file_name):
    path = os.path.join(file_dir, file_name)

    if not os.path.exists(path) or file_name.split('.')[-1] != 'parquet':
        raise Exception(f'No File: {path}')
        return
    
    return pd.read_parquet(path)

In [43]:
def _load_data(market, num_days=10, count=200, file_name=None):
    market = 'KRW-ETC' if market.lower() == 'etc' else 'KRW-BTC'

    to = datetime.now()
    num_batches = int(num_days * 24 * 60 / count)

    data = []

    for i in range(num_batches):
        print(f'\r{i + 1}/{num_batches}', end="")
        url = f"{url_base}?market={market}&count={count}&to={str(to).split('.')[:-1][0]}"
        response = requests.get(url, headers=headers)
        data += json.loads(response.text)
        to -= timedelta(minutes=count)

    print()
    
    df = pd.DataFrame(data=data)

    if file_name:
        save_parquet(df, file_name)
    
    return df


def _preprocess(data, k=1.01, margin=0, verbose='new_line', file_name=None):
    _data = data.copy().iloc[::-1].reset_index(drop=True)

    _data.insert(5, 'mid_price', (_data['high_price'] + _data['low_price']) / 2)
    _data.insert(0, 'price_change_rate', 0)

    for r in tqdm(range(1, len(_data)), ncols=70, leave=False):
        if verbose:
            print(f"\r{r + 1}/{len(_data)}", end="")

        curr = _data.iloc[r].mid_price
        prev = _data.iloc[r - 1].mid_price
        _data.iloc[r, 0] = (curr - prev) / prev
    
    if verbose:
        print('', end='\n' if verbose == 'new_line' else '\r')

    inf = len(_data) + 1

    _data.insert(0, 'until_target_mid', inf)

    stack = []
    for r in range(len(_data) - 1, -1, -1):
        if verbose:
            print(f"\r{len(_data) - r}/{len(_data)}", end="")

        curr = _data.iloc[r].mid_price

        while stack:
            [i, p] = stack[-1]
            if curr > p:
                stack.pop()
            else:
                _data.iloc[r, 0] = i - r
                stack.append([r, curr / k])
                break
        
        if not stack:
            stack.append([r, curr / k])

    if verbose:
        print('', end='\n' if verbose == 'new_line' else '\r')

    _data.insert(0, 'until_target_low_high', inf)

    stack = []
    for r in range(len(_data) - 1, -1, -1):
        if verbose:
            print(f"\r{len(_data) - r}/{len(_data)}", end="")

        curr_low = _data.iloc[r].low_price
        curr_high = _data.iloc[r].high_price

        if curr_high * (1 - margin) >= curr_low * (1 + margin) * k:
            _data.iloc[r, 0] = 0

        while stack:
            [i, high] = stack[-1]
            if curr_low > high:
                stack.pop()
            else:
                if _data.iloc[r, 0] > 0:
                    _data.iloc[r, 0] = i - r
                stack.append([r, curr_high / k])
                break
        
        if not stack:
            stack.append([r, curr_high / k])
    
    if verbose:
        print('', end='\n' if verbose == 'new_line' else '\r')

    _data.insert(0, 'is_insider', _data['high_price'] / _data['low_price'] >= k)
    _data.insert(0, 'is_insider_margin', (_data['high_price'] * (1 - margin)) / (_data['low_price'] * (1 + margin)) >= k)

    if file_name:
        save_parquet(_data, file_name, attach_timestamp=True, postfix="prcd")

    return _data


def _search(market=None, num_days=None, df=None, min_k=1.001, max_k=1.01, dk=0.001, margin=0.01, verbose=True):
    if df is None:
        if market is None:
            return
        df = _load_data(market)

    if num_days is not None:
        num_samples = num_days * 24 * 60
        df = df.iloc[-num_samples:]

    print(f'DATA SIZE: {len(df)}', end="\n\n")

    k = min_k
    while k <= max_k:
        _data = _preprocess(df, k, margin=margin, verbose=verbose)
        
        print(f"[k={k}]")

        num_success_mid = sum(_data['until_target_mid'] < len(_data))
        required_mins_avg_mid = sum(_data.loc[_data.loc[:, 'until_target_mid'] < len(_data), 'until_target_mid']) / len(_data)
        num_success_low_high = sum(_data['until_target_low_high'] < len(_data))
        required_mins_avg_low_high = sum(_data.loc[_data.loc[:, 'until_target_low_high'] < len(_data), 'until_target_low_high']) / len(_data)
        sum_is_insider = sum(_data['is_insider'])
        sum_is_insider_margin = sum(_data['is_insider_margin'])
        
        print(f'MID: {num_success_mid / len(_data):.3f}({num_success_mid}), {required_mins_avg_mid:.2f}\'')
        print(f'LOW & HIGH: {num_success_low_high / len(_data):.3f}({num_success_low_high}), {required_mins_avg_low_high:.2f}\'')
        print(f'INSIDER RATE: {sum_is_insider / len(_data):.3f} ({sum_is_insider})')
        print(f'INSIDER MARGIN RATE: {sum_is_insider_margin / len(_data):.3f} ({sum_is_insider_margin})')
        
        if num_success_low_high == 0:
            return

        k += dk
        print()


def _simulate(df_candles, inital_balance=1e10, entry_rate=0.014, exit_rate=0.007):
    balance = initial_balance
    holding_coins = []
    
    

In [29]:
data_etc = load_parquet('krw-etc-1m-candle-365-days_20230213191128.parquet')
data_etc

Unnamed: 0,market,candle_date_time_utc,candle_date_time_kst,opening_price,high_price,low_price,trade_price,timestamp,candle_acc_trade_price,candle_acc_trade_volume,unit
0,KRW-ETC,2023-02-13T18:48:00,2023-02-14T03:48:00,26540.0,26540.0,26540.0,26540.0,1676314087485,2.013046e+05,7.584952,1
1,KRW-ETC,2023-02-13T18:47:00,2023-02-14T03:47:00,26540.0,26540.0,26540.0,26540.0,1676314032164,1.108414e+07,417.639170,1
2,KRW-ETC,2023-02-13T18:46:00,2023-02-14T03:46:00,26520.0,26540.0,26520.0,26540.0,1676313998126,6.438213e+06,242.670676,1
3,KRW-ETC,2023-02-13T18:43:00,2023-02-14T03:43:00,26530.0,26540.0,26530.0,26540.0,1676313821417,2.781002e+07,1048.208494,1
4,KRW-ETC,2023-02-13T18:42:00,2023-02-14T03:42:00,26520.0,26530.0,26520.0,26530.0,1676313765265,8.487663e+06,319.966800,1
...,...,...,...,...,...,...,...,...,...,...,...
525595,KRW-ETC,2022-02-13T16:45:00,2022-02-14T01:45:00,40020.0,40150.0,40010.0,40150.0,1644770757247,1.195251e+08,2983.833995,1
525596,KRW-ETC,2022-02-13T16:44:00,2022-02-14T01:44:00,40000.0,40080.0,40000.0,40020.0,1644770683887,2.618156e+07,654.428407,1
525597,KRW-ETC,2022-02-13T16:43:00,2022-02-14T01:43:00,40050.0,40080.0,40000.0,40000.0,1644770638667,1.893217e+07,473.117016,1
525598,KRW-ETC,2022-02-13T16:42:00,2022-02-14T01:42:00,40020.0,40090.0,40020.0,40090.0,1644770576618,3.399587e+07,849.022995,1


In [None]:
_search(df=data_etc, verbose=False)

DATA SIZE: 525600



                                                                      

[k=1.001]
MID: 0.984(516975), 2513.73'
LOW & HIGH: 0.997(524065), 533.67'
INSIDER RATE: 0.536 (281722)
INSIDER MARGIN RATE: 0.000 (118)



                                                                      

[k=1.0019999999999998]
MID: 0.961(505341), 7491.62'
LOW & HIGH: 0.988(519263), 1719.22'
INSIDER RATE: 0.237 (124478)
INSIDER MARGIN RATE: 0.000 (98)



                                                                      

[k=1.0029999999999997]
MID: 0.945(496691), 11440.40'
LOW & HIGH: 0.978(514057), 3623.84'
INSIDER RATE: 0.116 (60835)
INSIDER MARGIN RATE: 0.000 (83)



                                                                      

[k=1.0039999999999996]
MID: 0.921(484005), 16928.82'
LOW & HIGH: 0.965(507451), 5825.81'
INSIDER RATE: 0.061 (32239)
INSIDER MARGIN RATE: 0.000 (72)



                                                                      

[k=1.0049999999999994]
MID: 0.894(469961), 20147.39'
LOW & HIGH: 0.956(502514), 8441.84'
INSIDER RATE: 0.035 (18280)
INSIDER MARGIN RATE: 0.000 (64)



                                                                      

[k=1.0059999999999993]
MID: 0.872(458402), 20557.47'
LOW & HIGH: 0.948(498233), 9834.78'
INSIDER RATE: 0.021 (10901)
INSIDER MARGIN RATE: 0.000 (58)



                                                                      

[k=1.0069999999999992]
MID: 0.830(436138), 21951.96'
LOW & HIGH: 0.945(496770), 11305.58'
INSIDER RATE: 0.013 (6688)
INSIDER MARGIN RATE: 0.000 (51)



                                                                      

[k=1.0079999999999991]
MID: 0.773(406219), 23088.90'
LOW & HIGH: 0.928(487742), 13467.66'
INSIDER RATE: 0.008 (4381)
INSIDER MARGIN RATE: 0.000 (45)



                                                                      

[k=1.008999999999999]
MID: 0.689(362076), 20652.69'
LOW & HIGH: 0.919(482928), 16535.14'
INSIDER RATE: 0.006 (2904)
INSIDER MARGIN RATE: 0.000 (40)



                                                                      

[k=1.009999999999999]
MID: 0.644(338385), 18874.73'
LOW & HIGH: 0.915(480954), 19701.11'
INSIDER RATE: 0.004 (1993)
INSIDER MARGIN RATE: 0.000 (32)

