### 업비트 API를 활용한 코인가격변동 예측 모형 개발
* 작성일 : 2022-04-08
* 작성자 : 윤성준
* 버전 : 0.1
* 변경이력 

#### 목차
1. 데이터 수집
2. 항목 생성
3. 요건 정의 (dev/val, target 등)
4. ML모델학습
5. 평가

---

1. 데이터 수집

In [5]:
import pyupbit
import os
import pandas as pd
import pickle

In [6]:
tickers = pyupbit.get_tickers(fiat="KRW")
print(tickers)

['KRW-BTC', 'KRW-ETH', 'KRW-NEO', 'KRW-MTL', 'KRW-LTC', 'KRW-XRP', 'KRW-ETC', 'KRW-OMG', 'KRW-SNT', 'KRW-WAVES', 'KRW-XEM', 'KRW-QTUM', 'KRW-LSK', 'KRW-STEEM', 'KRW-XLM', 'KRW-ARDR', 'KRW-ARK', 'KRW-STORJ', 'KRW-GRS', 'KRW-REP', 'KRW-ADA', 'KRW-SBD', 'KRW-POWR', 'KRW-BTG', 'KRW-ICX', 'KRW-EOS', 'KRW-TRX', 'KRW-SC', 'KRW-ONT', 'KRW-ZIL', 'KRW-POLY', 'KRW-ZRX', 'KRW-LOOM', 'KRW-BCH', 'KRW-BAT', 'KRW-IOST', 'KRW-RFR', 'KRW-CVC', 'KRW-IQ', 'KRW-IOTA', 'KRW-MFT', 'KRW-ONG', 'KRW-GAS', 'KRW-UPP', 'KRW-ELF', 'KRW-KNC', 'KRW-BSV', 'KRW-THETA', 'KRW-QKC', 'KRW-BTT', 'KRW-MOC', 'KRW-ENJ', 'KRW-TFUEL', 'KRW-MANA', 'KRW-ANKR', 'KRW-AERGO', 'KRW-ATOM', 'KRW-TT', 'KRW-CRE', 'KRW-MBL', 'KRW-WAXP', 'KRW-HBAR', 'KRW-MED', 'KRW-MLK', 'KRW-STPT', 'KRW-ORBS', 'KRW-VET', 'KRW-CHZ', 'KRW-STMX', 'KRW-DKA', 'KRW-HIVE', 'KRW-KAVA', 'KRW-AHT', 'KRW-LINK', 'KRW-XTZ', 'KRW-BORA', 'KRW-JST', 'KRW-CRO', 'KRW-TON', 'KRW-SXP', 'KRW-HUNT', 'KRW-PLA', 'KRW-DOT', 'KRW-SRM', 'KRW-MVL', 'KRW-STRAX', 'KRW-AQT', 'KRW-GLM', 

In [3]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

WORKDATE = '2022-04-01'
REQMONTHS = 7

workdate = datetime.fromisoformat(WORKDATE)
startdate = workdate - relativedelta(months=REQMONTHS)

# ohlcvs = {}
# for ticker in tickers:
    # ohlcvs[ticker] = pyupbit.get_ohlcv_from(ticker, interval="minute1", to="20220401", fromDatetime=startdate)

In [4]:
# n개월치 데이터 가져오기 - 재활용 가능한 함수로 구현
def get_ohlcv_dump(filename = 'data.pickle', workdate = datetime.now(), reqmonths = 7, force_download = False):
    """    download ohlcv data from upbit and dump it as a pickle. if already has one, use it.
    Parameters    
        filename(String) : pickle name
        workdate(String, format %Y-%m-%d) : base date of work, download until workdate-1
        reqmonths(int) : num of requiring month
        force_download(bool)
    returns
        ohlcvas(key : ticker, value : pandas dataframe)

    """
    ohlcvs = {}
    if force_download or not os.path.exists(filename):
        
        startdate = workdate - relativedelta(months=reqmonths)
        
        for ticker in tickers:
            ohlcvs[ticker] = pyupbit.get_ohlcv_from(ticker, interval="minute1", to=workdate.strftime("%Y%m%d"), fromDatetime=startdate)
            # just for fast test, get 3 rows
            # ohlcvs[ticker] = pyupbit.get_ohlcv(ticker, interval="minute1", to=workdate.strftime("%Y%m%d"), count = 3)
        
        # Save pickle
        with open(filename,"wb") as fw:
            pickle.dump(ohlcvs, fw)
        
    else:
        # Load pickle
        with open(filename,"rb") as fr:
            ohlcvs = pickle.load(fr)

    return ohlcvs
    

In [7]:
ohlcvs = get_ohlcv_dump(filename = 'data.pickle', workdate = workdate, reqmonths = REQMONTHS)

2. 데이터 정제

In [8]:
for key, val in ohlcvs.items():
    print(key, val.size)

KRW-BTC 1826814
KRW-ETH 1826502
KRW-NEO 1716108
KRW-MTL 1536216
KRW-LTC 1672956
KRW-XRP 1826754
KRW-ETC 1813032
KRW-OMG 1737996
KRW-SNT 1602306
KRW-WAVES 1704420
KRW-XEM 1476048
KRW-QTUM 1761078
KRW-LSK 1462116
KRW-STEEM 1488834
KRW-XLM 1735842
KRW-ARDR 1445334
KRW-ARK 1464846
KRW-STORJ 1581816
KRW-GRS 1376070
KRW-REP 1449246
KRW-ADA 1811994
KRW-SBD 1353252
KRW-POWR 1685922
KRW-BTG 1629966
KRW-ICX 1668246
KRW-EOS 1788900
KRW-TRX 1810698
KRW-SC 1563060
KRW-ONT 1550724
KRW-ZIL 1551738
KRW-POLY 1599582
KRW-ZRX 1424304
KRW-LOOM 1467552
KRW-BCH 1654794
KRW-BAT 1651668
KRW-IOST 1458768
KRW-RFR 1388598
KRW-CVC 1581660
KRW-IQ 1310058
KRW-IOTA 1476150
KRW-MFT 1449816
KRW-ONG 1575312
KRW-GAS 1552836
KRW-UPP 1354794
KRW-ELF 1613814
KRW-KNC 1610424
KRW-BSV 1532196
KRW-THETA 1591764
KRW-QKC 1389702
KRW-BTT 1755510
KRW-MOC 1544514
KRW-ENJ 1656336
KRW-TFUEL 1603488
KRW-MANA 1802304
KRW-ANKR 1557480
KRW-AERGO 1543176
KRW-ATOM 1799424
KRW-TT 1438518
KRW-CRE 1528842
KRW-MBL 1547706
KRW-WAXP 1692864
KRW-

In [9]:
assert isinstance(ohlcvs['KRW-BTC'].index, pd.DatetimeIndex), 'not datetime index'

In [10]:
ohlcvs['KRW-BTC']

Unnamed: 0,open,high,low,close,volume,value
2021-09-01 00:00:00,55326000.0,55422000.0,55326000.0,55327000.0,16.458649,9.113422e+08
2021-09-01 00:01:00,55327000.0,55399000.0,55327000.0,55393000.0,9.624262,5.327570e+08
2021-09-01 00:02:00,55400000.0,55477000.0,55391000.0,55443000.0,12.328954,6.834197e+08
2021-09-01 00:03:00,55439000.0,55463000.0,55343000.0,55350000.0,18.474244,1.023428e+09
2021-09-01 00:04:00,55378000.0,55385000.0,55331000.0,55340000.0,12.750241,7.056209e+08
...,...,...,...,...,...,...
2022-03-31 23:55:00,56461000.0,56485000.0,56461000.0,56481000.0,7.931584,4.479158e+08
2022-03-31 23:56:00,56480000.0,56490000.0,56480000.0,56490000.0,4.296142,2.426775e+08
2022-03-31 23:57:00,56490000.0,56499000.0,56485000.0,56488000.0,5.051226,2.853419e+08
2022-03-31 23:58:00,56489000.0,56505000.0,56486000.0,56499000.0,3.184290,1.799036e+08


In [23]:
tot_idx = pd.date_range(start = startdate , end = workdate, freq = 'T', closed = 'left')
tot_set = pd.DataFrame(index = tot_idx)

for key, val in ohlcvs.items():

    joined_val = pd.DataFrame(index = tot_idx).join(val, how = 'left')
    joined_val.close.fillna(method = 'ffill', inplace = True)
    joined_val.open.fillna(joined_val.close, inplace = True)
    joined_val.high.fillna(joined_val.close, inplace = True)
    joined_val.low.fillna(joined_val.close, inplace = True)
    joined_val.value.fillna(joined_val.close, inplace = True)
    joined_val.volume.fillna(value = 0, inplace = True)

    joined_val = joined_val.rename(columns = {"open":key+"-open"
                , "high":key+"-high"
                , "low":key+"-low"
                , "close":key+"-close"
                , "volume":key+"-volume"
                , "value":key+"-value"})

    tot_set = tot_set.join(joined_val)
    

In [24]:
tot_set.columns

Index(['KRW-BTC-open', 'KRW-BTC-high', 'KRW-BTC-low', 'KRW-BTC-close',
       'KRW-BTC-volume', 'KRW-BTC-value', 'KRW-ETH-open', 'KRW-ETH-high',
       'KRW-ETH-low', 'KRW-ETH-close',
       ...
       'KRW-T-low', 'KRW-T-close', 'KRW-T-volume', 'KRW-T-value',
       'KRW-CELO-open', 'KRW-CELO-high', 'KRW-CELO-low', 'KRW-CELO-close',
       'KRW-CELO-volume', 'KRW-CELO-value'],
      dtype='object', length=678)

TypeError: unsupported operand type(s) for +: 'int' and 'str'