In [1]:
import pyupbit
from datetime import datetime, timedelta
import pandas as pd
pd.options.display.float_format = '{: .8f}'.format
pd.set_option('display.max_rows', 500)
import time
from collections import deque
import warnings
warnings.filterwarnings(action='ignore')
from functools import reduce
import matplotlib.pyplot as plt
import os
import progressbar

In [2]:
# 애초에 변동성을 봐야한다.
# 1차적으로 range가 커야함 NEO는 괜찮은데  good: 9 15 bad 23 5
# open, high, low, close, range 등 다양한 지표들의 상관관계를 구하고 싶은데 구할수가 없다.

In [3]:
# 일봉, 시간봉을 기준으로 각각 최근 200개 데이터에 대하여, 
# 거래대금 평균, 저가 평균, 15 50 평균선 조건으로 
# 백테스팅 할 ticker들을 우선 구한다.

def get_base_target_tickers():
    tickers = pyupbit.get_tickers(fiat='KRW')
    targets = []
    
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        df = pyupbit.get_ohlcv(ticker)
        
        if len(df) < 200:
            continue
        
        target1 = False
        target2 = False
    
        # validate target1
        value = df['value'].mean() / 1000000
        low = df['low'].mean()  

        if value >= 300000 or low >= 100000:
            target1 = True
        elif low >= 10000 and get_moving_avg(df):
            target1 = True
            
        time.sleep(0.2)
        
        df = pyupbit.get_ohlcv(ticker, interval='minute60')
            
        # validate target2
        value = df['value'].mean() / 1000000
        low = df['low'].mean()

        if value >= 100000 or low >= 100000:
            target2 = True
        elif low >= 10000 and get_moving_avg(df):
            target2 = True
            
        if target1 or target2:
            targets.append(ticker)

        time.sleep(0.2)
    
    return targets

In [4]:
# pyupbit에서 제공하는 API로는 필요한 DataFrame의 형태를 구할 수 없어서,
# 우선 필터링한 ticker들의 데이터를 커스텀한 후, CSV로 저장한다.

def export_to_csv(tickers):
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        try:
            dfs = []
            df = pyupbit.get_ohlcv(ticker=ticker, interval="minute60")
            dfs.append(df)
            time.sleep(0.2)

            for i in range(5):
                df = pyupbit.get_ohlcv(ticker=ticker, to=df.index[0], interval="minute60")
                dfs.append(df)
                time.sleep(0.2)

            df = pd.concat(dfs)
            df = df.sort_index()
            df.to_csv('data/' + ticker + '.csv')
        except:
            print('error', ticker)

In [5]:
def get_file_list():
    files = os.listdir('data')
    tickers = []

    for file in files:
        tickers.append(file.split('.')[0])

    return tickers

In [6]:
def get_df_from_csv(ticker):
    fn = 'data' + '/' + ticker + '.csv'
    df = pd.read_csv(fn)
    df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])
    df['day'] = df['Unnamed: 0'].dt.day
    df['hour'] = df['Unnamed: 0'].dt.hour
    df = df.set_index('Unnamed: 0')
    df.index.name = None
    
    return df

In [7]:
# 필요한 시작시간 및 경과시간을 입력하여 groupby 하여 반환한다.

def get_ohlcv_range_base(df, start, elapse):
    if start + elapse < 24:
        cond = (df['hour'] >= start) & (df['hour'] < start+elapse)
    else:
        cond1 = (df['hour'] >= start)
        cond2 = (df['hour'] < start+elapse - 24)
        cond = cond1 | cond2
        
    df = df[cond]
    df = df.resample('24H', base=start).agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last', 
        'volume': 'sum',
        'value': 'sum'
    })
    
    return df

In [8]:
def get_moving_avg(df):
    ma15 = deque(maxlen=15)
    ma50 = deque(maxlen=50)
    ma15.extend(df['close'])
    ma50.extend(df['close'])
    
    curr_ma15 = sum(ma15) / len(ma15)
    curr_ma50 = sum(ma50) / len(ma50)

    return curr_ma15 > curr_ma50

In [9]:
# 기존 VB 알고리즘 적용하여 백테스팅한다.

def backtesting(df, k=0.5):
    df = df[['open', 'high', 'low', 'close']]
    df['range'] = df['high'] - df['low']
    df['target'] = df['range'].shift(1) * k + df['open']

    cond = df['high'] > df['target']
    df['ror'] = df.loc[cond, 'close'] / df.loc[cond, 'target'] * 0.9995 * 0.9995
    df['ror'].fillna(1, inplace=True)
    
    df['buy'] = cond

    ror = 1 
    for i in range(len(df)):
        ror = ror * df['ror'][i]

    return ror, df

In [10]:
# 기존 VB 알고리즘 + noise 적용하여 백테스팅한다.

def backtesting_noise(df, k=0.5):
    df = df[['open', 'high', 'low', 'close']]
    df['range'] = df['high'] - df['low']
    df['noise'] = 1 - abs(df['open'] - df['close']) / (df['high'] - df['low'])
    df['target'] = df['range'].shift(1) * df['noise'] + df['open']

    cond = df['high'] > df['target']
    df['ror'] = df.loc[cond, 'close'] / df.loc[cond, 'target'] * 0.9995 * 0.9995
    df['ror'].fillna(1, inplace=True)
    
    df['buy'] = cond

    ror = 1 
    for i in range(len(df)):
        ror = ror * df['ror'][i]

    return ror, df

In [11]:
# 기본
def get_tickers_ror(tickers, out_file):
    df_result = pd.DataFrame(columns=[
        'ticker', 
        'start', 
        'elapse', 
        'end', 
        'ror',
        'buy'
    ])
    
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        df = get_df_from_csv(ticker)
        start = 10
        elapse = 24

        df_base = get_ohlcv_range_base(df, start, elapse)

        ror, test_df = backtesting_noise(df_base)

        end = start+elapse-24 if start+elapse > 24 else start+elapse

        df_result = df_result.append([{
            'ticker': ticker, 
            'start': start, 
            'elapse': elapse, 
            'end': end, 
            'ror': ror,
            'buy': test_df[test_df['buy'] == True]['buy'].count()
        }])
                
    df_result.to_csv(out_file)
    return df_result

In [12]:
# 시간, 경과시간들을 변화시켜 가며 ROR을 구한다.
def get_tickers_ror_by_time(tickers, out_file):
    df_result = pd.DataFrame(columns=[
        'ticker', 
        'start', 
        'elapse', 
        'end', 
        'ror',
        'buy'
    ])
    
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        df = get_df_from_csv(ticker)

        for start in range(0, 24):
            for elapse in range(1, 25):
                df_base = get_ohlcv_range_base(df, start, elapse)

                ror, test_df = backtesting_noise(df_base)

                end = start+elapse-24 if start+elapse > 24 else start+elapse

                df_result = df_result.append([{
                    'ticker': ticker, 
                    'start': start, 
                    'elapse': elapse, 
                    'end': end, 
                    'ror': ror,
                    'buy': test_df[test_df['buy'] == True]['buy'].count()
                }])
                
    df_result.to_csv(out_file)
    return df_result

In [29]:
print('get_base_target_tickers')
tickers = get_base_target_tickers()
time.sleep(0.2)
tickers.append('KRW-WAVES')

print('export_to_csv')
export_to_csv(tickers)

 [Elapsed Time: 0:00:00] |                                                          | (ETA:  --:--:--) 

get_base_target_tickers


 [Elapsed Time: 0:01:04] |###########################################################| (Time: 0:01:04) 
 [Elapsed Time: 0:00:00] |                                                          | (ETA:  --:--:--) 

export_to_csv


 [Elapsed Time: 0:00:22] |###########################################################| (Time: 0:00:22) 


In [30]:
tickers = get_file_list()
file_name = 'result.csv'

get_tickers_ror_by_time(tickers, file_name)
# get_tickers_ror(tickers, file_name)
df = pd.read_csv(file_name)
df = df[['ticker', 'start', 'elapse', 'end', 'ror', 'buy']]

 [Elapsed Time: 0:00:34] |###########################################################| (Time: 0:00:34) 


Unnamed: 0,ticker,start,elapse,end,ror,buy
0,KRW-AXS,0,1,1,0.98108315,15
1,KRW-AXS,0,2,2,0.97737883,16
2,KRW-AXS,0,3,3,0.85529329,18
3,KRW-AXS,0,4,4,0.96125047,17
4,KRW-AXS,0,5,5,0.90647822,16
...,...,...,...,...,...,...
6331,KRW-WAVES,23,20,19,1.07293583,23
6332,KRW-WAVES,23,21,20,1.40709404,22
6333,KRW-WAVES,23,22,21,1.74600257,23
6334,KRW-WAVES,23,23,22,2.02649653,22


In [31]:
df_test = df[df['ror'] > 1.4]
print(len(df_test))
df_test.sort_values(by=['buy', 'elapse', 'ror'], ascending=[False, True, False])

267


Unnamed: 0,ticker,start,elapse,end,ror,buy
4708,KRW-STRK,4,5,9,1.42429727,28
4662,KRW-STRK,2,7,9,1.49271953,26
4685,KRW-STRK,3,6,9,1.49547533,25
5889,KRW-WAVES,5,10,15,1.48576581,25
5891,KRW-WAVES,5,12,17,1.54305072,25
5781,KRW-WAVES,0,22,22,1.44596234,25
5890,KRW-WAVES,5,11,16,1.40734835,24
5868,KRW-WAVES,4,13,17,1.64404069,24
6277,KRW-WAVES,21,14,11,1.47445429,24
5822,KRW-WAVES,2,15,17,1.6162537,24
