In [23]:
import pyupbit
from datetime import datetime, timedelta
import pandas as pd
pd.options.display.float_format = '{: .8f}'.format
pd.set_option('display.max_rows', 500)
import time
from collections import deque
import warnings
warnings.filterwarnings(action='ignore')
from functools import reduce
import matplotlib.pyplot as plt
import os
import progressbar

In [24]:
# 애초에 변동성을 봐야한다.
# 1차적으로 range가 커야함 NEO는 괜찮은데  good: 9 15 bad 23 5
# open, high, low, close, range 등 다양한 지표들의 상관관계를 구하고 싶은데 구할수가 없다.

In [25]:
# 일봉, 시간봉을 기준으로 각각 최근 200개 데이터에 대하여, 
# 거래대금 평균, 저가 평균, 15 50 평균선 조건으로 
# 백테스팅 할 ticker들을 우선 구한다.

def get_base_target_tickers():
    tickers = pyupbit.get_tickers(fiat='KRW')
    targets = []
    
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        df = pyupbit.get_ohlcv(ticker)
        
        if len(df) < 200:
            continue
        
        target1 = False
        target2 = False
    
        # validate target1
        value = df['value'].mean() / 1000000
        low = df['low'].mean()  

        if value >= 300000 or low >= 100000:
            target1 = True
        elif low >= 10000 and get_moving_avg(df):
            target1 = True
            
        time.sleep(0.2)
        
        df = pyupbit.get_ohlcv(ticker, interval='minute60')
            
        # validate target2
        value = df['value'].mean() / 1000000
        low = df['low'].mean()

        if value >= 100000 or low >= 100000:
            target2 = True
        elif low >= 10000 and get_moving_avg(df):
            target2 = True
            
        if target1 or target2:
            targets.append(ticker)

        time.sleep(0.2)
    
    return targets

In [35]:
# pyupbit에서 제공하는 API로는 필요한 DataFrame의 형태를 구할 수 없어서,
# 우선 필터링한 ticker들의 데이터를 커스텀한 후, CSV로 저장한다.

def export_to_csv(tickers):
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        try:
            dfs = []
            df = pyupbit.get_ohlcv(ticker=ticker, interval="minute60")
            dfs.append(df)
            time.sleep(0.2)

            for i in range(5):
                df = pyupbit.get_ohlcv(ticker=ticker, to=df.index[0], interval="minute60")
                dfs.append(df)
                time.sleep(0.2)

            df = pd.concat(dfs)
            df = df.sort_index()
            df.to_csv('data/' + ticker + '.csv')
        except:
            print('error', ticker)

In [27]:
def get_file_list():
    files = os.listdir('data')
    tickers = []

    for file in files:
        tickers.append(file.split('.')[0])

    return tickers

In [28]:
def get_df_from_csv(ticker):
    fn = 'data' + '/' + ticker + '.csv'
    df = pd.read_csv(fn)
    df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])
    df['day'] = df['Unnamed: 0'].dt.day
    df['hour'] = df['Unnamed: 0'].dt.hour
    df = df.set_index('Unnamed: 0')
    df.index.name = None
    
    return df

In [29]:
# 필요한 시작시간 및 경과시간을 입력하여 groupby 하여 반환한다.

def get_ohlcv_range_base(df, start, elapse):
    if start + elapse < 24:
        cond = (df['hour'] >= start) & (df['hour'] < start+elapse)
    else:
        cond1 = (df['hour'] >= start)
        cond2 = (df['hour'] < start+elapse - 24)
        cond = cond1 | cond2
        
    df = df[cond]
    df = df.resample('24H', base=start).agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last', 
        'volume': 'sum',
        'value': 'sum'
    })
    
    return df

In [30]:
def get_moving_avg(df):
    ma15 = deque(maxlen=15)
    ma50 = deque(maxlen=50)
    ma15.extend(df['close'])
    ma50.extend(df['close'])
    
    curr_ma15 = sum(ma15) / len(ma15)
    curr_ma50 = sum(ma50) / len(ma50)

    return curr_ma15 > curr_ma50

In [31]:
# 기존 VB 알고리즘을 적용하여 백테스팅한다.

def backtesting(df, k=0.5):
    df = df[['open', 'high', 'low', 'close']]
    df['range'] = df['high'] - df['low']
    df['target'] = df['range'].shift(1) * k + df['open']

    cond = df['high'] > df['target']
    df['ror'] = df.loc[cond, 'close'] / df.loc[cond, 'target'] * 0.9995 * 0.9995
    df['ror'].fillna(1, inplace=True)

    ror = 1 
    for i in range(len(df)):
        ror = ror * df['ror'][i]

    return ror, df

In [32]:
# 시간, 경과시간들을 변화시켜 가며 ROR을 구한다.

def get_tickers_ror_by_time(tickers, out_file):
    df_result = pd.DataFrame(columns=[
        'ticker', 
        'start', 
        'elapse', 
        'end', 
        'ror', 
    ])
    
    bar = progressbar.ProgressBar(widgets=[' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ',])
    
    for ticker in bar(tickers):
        df = get_df_from_csv(ticker)

        for start in range(0, 24):
            for elapse in range(1, 25):
                df_base = get_ohlcv_range_base(df, start, elapse)

                ror, test_df = backtesting(df_base)

                end = start+elapse-24 if start+elapse > 24 else start+elapse

                df_result = df_result.append([{
                    'ticker': ticker, 
                    'start': start, 
                    'elapse': elapse, 
                    'end': end, 
                    'ror': ror,
                }])
                
    df_result.to_csv(out_file)
    return df_result

In [47]:
# print('get_base_target_tickers')
# tickers = get_base_target_tickers()
# time.sleep(0.2)
# tickers = get_file_list()
# tickers.append('KRW-WAVES')

# print('export_to_csv')
# export_to_csv(tickers)

tickers = get_file_list()
file_name = 'result.csv'

get_tickers_ror_by_time(tickers, file_name)
df = pd.read_csv(file_name)
df = df[['ticker', 'start', 'elapse', 'end', 'ror']]

 [Elapsed Time: 0:00:29] |####################################| (Time: 0:00:29) 


In [50]:
df = df[df['ror'] >= 1.2]
df = df[df['ticker'] != 'KRW-WAVES']
df

Unnamed: 0,ticker,start,elapse,end,ror
20,KRW-AXS,0,21,21,1.2733283
22,KRW-AXS,0,23,23,1.38729008
23,KRW-AXS,0,24,24,1.33572946
91,KRW-AXS,3,20,23,1.3194747
92,KRW-AXS,3,21,24,1.28326718
93,KRW-AXS,3,22,1,1.20084693
138,KRW-AXS,5,19,24,1.201111
229,KRW-AXS,9,14,23,1.20407694
230,KRW-AXS,9,15,24,1.22195766
296,KRW-AXS,12,9,21,1.21260167
