# Tick Data 다루기

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pyupbit

## 데이터

### Bitcoin 가격 불러오기

    pyupbit.get_ohlcv(ticker, interval, count)

In [None]:
ticker = 'KRW-BTC'
df = pyupbit.get_ohlcv(ticker, 'minute1', count = 1000)
df.head()

In [None]:
df.close.plot()
plt.title(ticker, loc = 'right', size = 20)
plt.show()


 Tick data

> $(p_t, v_t)$

In [None]:
tick_seq = df.loc[:,['close','volume']]
tick_seq.columns = ['price', 'volume']
tick_seq.head()

## Remark

    resample & ohlc

In [None]:
# resample : 1분봉  --->  5분봉 
resampled = tick_seq.resample('5min')
resampled.sum()

In [None]:
resampled['price'].ohlc()

## Time Bar

Tick 데이터 $(p_t, v_t)$를 받아서 rule에 따라 resampling해서

ohlcv (open, high, low, close, volume)을 얻음

Ex) timebar(tick데이터, rule = '1m')

    초마다 (가격, 거래량) 정보를 얻음

    이를 1분마다 resampling함

    그럼 분당 (시작가, 최고가, 최저가, 종가, 거래량)을 return

In [None]:
def timebar(tick_data : pd.DataFrame, rule : str = '1M') -> pd.DataFrame:
    """
    Input
    - tick_seq : 가격과 거래량 컬럼을 가진 데이터 프레임
    - rule : 샘플링 주기
    Ouput
    - ohlcvv (open, high, low, volume, value)
    """   

    tick_data.columns = ['price', 'volume']

    # resampling
    resampled = tick_data.resample(rule)

    # ohlcv
    result = resampled['price'].ohlc()  
    result['volume'] = resampled['volume'].sum()

    # Handling nan
    result['close'] = result['close'].fillna(method = 'ffill')
    result['open'] = result['open'].fillna(result['close'])
    result['high'] = result['high'].fillna(result['close'])
    result['low'] = result['low'].fillna(result['close'])

    return result

In [None]:
df_timebar = timebar(tick_seq, '150S')
df_timebar.head()

In [None]:
plt.plot(df.close, label = '1m')
plt.plot(df_timebar.close, label = '150S')
plt.title('Original(1m) vs Time bar(150S)')
plt.legend(loc = 'best')
plt.xticks(rotation = '45', size = 10)
plt.show()

## Tick bar

$t$ 가 일정범위를 넘어갈 때 마다 tick data $(p_t, v_t)$를 sampling

Ex) T = 3

틱 데이터 {$(p_t, v_t)$} $_{t = 0, 3, 5, 8, 10, 13, 14 ...}$ 수집 시

$(p_0, v_0), (p_8, v_8), (p_{14}, v_{14}), ....$ 로 resampling 함

In [None]:
def tickbar(tick_data : pd.DataFrame, window_size :int = 10) -> pd.DataFrame:
    """
    Input
    - tick_seq : 가격과 거래량 컬럼을 가진 데이터 프레임
    - window size : 몇 틱마다 데이터를 수집할 것인지
    Ouput
    - ohlcvv (open, high, low, volume, value)
    """

    tick_data = tick_data.reset_index()

    # 각 틱마다 window 번호 부여
    tick_data['window_num'] = np.arange(len(tick_data))//window_size
    
    # ohlcv
    grouped = tick_data.groupby('window_num')
    result = grouped['price'].ohlc()
    result['volume'] = grouped['volume'].sum()

    result = result.set_index(grouped['index'].first())        
    return result

In [None]:
df_tickbar = tickbar(tick_seq, 30)
df_tickbar.head()

In [None]:
plt.plot(df.close, label = '1m')
plt.plot(df_timebar.close, label = '150S')
plt.plot(df_tickbar.close, label = '30 tick')
plt.title('Original(1m) vs Time bar(150S) vs Tick bar(120 tick)', size = 10)
plt.legend(loc = 'best')
plt.xticks(rotation = '45', size = 10)
plt.show()

## Volume bar

(이전 sampling 이후로) $v_t$ 누적량이 일정범위를 넘어갈 때 마다 sampling

In [None]:
def volumebar(tick_data : pd.DataFrame, unit_volume : int) -> pd.DataFrame:
    
    tick_data = tick_data.reset_index()

    # window_num로 groupping
    tick_data['window_num'] = tick_data['volume'].cumsum()//unit_volume
    grouped = tick_data.groupby('window_num')
    
    # ohlcv
    result = grouped['price'].ohlc()
    result['volume'] = grouped['volume'].sum()

    result = result.set_index(grouped['index'].first())
    return result

In [None]:
df_volumebar = volumebar(tick_seq, 80)
df_volumebar.head()

In [None]:
plt.plot(df.close, label = '1m')
plt.plot(df_timebar.close, label = '150S')
plt.plot(df_tickbar.close, label = '30 tick')
plt.plot(df_volumebar.close, label = '80 volume')
plt.legend(loc = 'best')
plt.xticks(rotation = '45', size = 10)
plt.show()

## Dollor bar

(이전 sampling 이후로) $p_t \times v_t$ 누적량이 일정범위를 넘어갈 때 마다 sampling

In [None]:
def dollarbar(tick_data : pd.DataFrame, unit_dollar : int) -> pd.DataFrame:
    tick_data = tick_data.reset_index()

    # value = 가격 x 거래량
    tick_data['value'] = tick_data['price']*tick_data['volume']
 
    # window_num로 groupping
    tick_data['window_num'] = tick_data['value'].cumsum()//unit_dollar
    grouped = tick_data.groupby('window_num')
    
    # ohlcv
    result = grouped['price'].ohlc()
    result['volume'] = grouped['volume'].sum()

    result = result.set_index(grouped['index'].first())
    return result

In [None]:
df_dollarbar = dollarbar(tick_seq, 10000)
df_dollarbar.head()

In [None]:
plt.plot(df.close, label = '1m')
plt.plot(df_timebar.close, label = '150S')
plt.plot(df_tickbar.close, label = '30 tick')
plt.plot(df_volumebar.close, label = '80 volume')
plt.plot(df_dollarbar.close, label = '10000 dollar bar')
plt.legend(loc = 'best')
plt.title('Tick Data Tutorial')
plt.xticks(rotation = '45', size = 10)
plt.show()