In [1]:
import pandas as pd
import numpy as np
from typing import List

In [2]:
pd.set_option('mode.chained_assignment',  None) 
pd.options.display.float_format = '{:.3f}'.format

소수점 표시를 위한 출력 설정 변경

# 데이터 불러오기

In [3]:
data = []
names = ['data/bitcoin.csv', 'data/repl.csv', 'data/etherium.csv']

for name in names:
    data.append(pd.read_csv(name))

# 결과 데이터를 저장할 데이터 프레임 생성

In [4]:
res = [] # 결과 데이터 저장

for ticker_data in data:
    res.append(ticker_data[['close', 'volume']])

res는 순서대로 bitcoin, reple, etherium의 기술지표를 저장할 리스트

In [5]:
time = data[0][['time']] # 시간을 저장할 데이터, 시간 데이터는 별도의 독립변수로 사용되지 않을 예정

In [6]:
res[0]

Unnamed: 0,close,volume
0,2996000.000,11139.871
1,3073000.000,8279.465
2,3119000.000,10731.677
3,3096000.000,9899.323
4,3083000.000,9472.610
...,...,...
4016,22320000.000,1666.680
4017,22403000.000,1587.818
4018,23004000.000,1675.061
4019,22790000.000,1505.955


In [7]:
class Indicator(object):
    def __init__(self,
                 data: List[pd.DataFrame],
                 res: List[pd.DataFrame]):
        self.data = data
        self.res = res
    
    def set_log_scale(self) -> None:
        '''
            transform close price to log scale close price
        '''
        for i in range(3):
            self.res[i].loc[:, 'close_log'] = np.log(self.data[i]['close'])
    
    def set_nvi(self) -> None:
        '''
            add nvi(negative volume index) to dataframe
        '''
        def nvi(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            nvi = [0] * len(coin)
            nvi[0] = 1
            close = coin['close'].copy()
            volume = coin['volume'].copy()
            
            for i in range(1, len(coin)):
                if volume[i] < volume[i-1]:
                    nvi[i] = nvi[i-1] + (close[i] - close[i-1]) * \
                            nvi[i-1] / close[i-1]
                else:
                    nvi[i] = nvi[i-1]
            res.loc[:, 'nvi'] = nvi
            return res
        
        for i in range(3):
            self.res[i] = nvi(self.res[i], self.data[i])
            
    def set_pvi(self) -> None:
        '''
            add pvi(positive volume index) to dataframe
        '''
        def pvi(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            pvi = [0] * len(coin)
            pvi[0] = 100
            volume = coin['volume']
            close = coin['close']
            
            for i in range(1, len(coin)):
                if volume[i] > volume[i-1]:
                    pvi[i] = pvi[i-1] + (close[i] - close[i-1] / \
                                close[i-1] * pvi[i-1])
                else:
                    pvi[i] = pvi[i-1]
            res.loc[:, 'pvi'] = pvi
            return res
        
        for i in range(3):
            self.res[i] = pvi(self.res[i], self.data[i]) 
    
    def set_ma(self) -> None:
        '''
            add ma(moving average) to dataframe
            It returns ma for 5, 10, 20, 60 intervals
            data's interval is 12hours
            '''
        def ma(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            days = [5, 10, 20, 60]
            close = coin['close']
            ma = pd.DataFrame()

            for day in days:
                ma.loc[:, 'ma_' + str(day)] = close.rolling(day).mean()
            res = pd.concat([res, ma], axis=1)
            
            return res
        
        for i in range(3):
            self.res[i] = ma(self.res[i], self.data[i])
        
    def set_rsi(self, period: int=14) -> None:
        '''
            add rsi(relative strength index) to dataframe
        '''
        def rsi(res: pd.DataFrame, coin: pd.DataFrame, period: int) -> pd.DataFrame:
            close = coin['close']
            
            U = np.where(close.diff(1) > 0, close.diff(1), 0)
            D = np.where(close.diff(1) < 0, close.diff(1) * (-1), 0)

            AU = pd.DataFrame(U).rolling(window=period, min_periods=period).mean()
            AD = pd.DataFrame(D).rolling(window=period, min_periods=period).mean()

            rsi = AU.div(AD+AU) * 100

            res.loc[:, 'rsi'] = rsi[0]
            
            return res
        
        for i in range(3):
            self.res[i] = rsi(self.res[i], self.data[i], period)
    
    def set_vpt(self) -> None:
        '''
            add vpt(volume price trend) to dataframe
        '''
        def vpt(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            vpt_list = [0] * len(coin)
            vpt_list[-1] = 2402.1359 # 최신 날짜 기준의 vpt
            volume = coin['volume']
            close = coin['close']

            for i in range(len(vpt_list) - 1, 0, -1):
                vpt_list[i-1] = vpt_list[i] - volume.iloc[i] * \
                                (close.iloc[i] - close.iloc[i-1]) / \
                                close.iloc[i-1]
            res.loc[:, 'vpt'] = vpt_list
            
            return res
            
        for i in range(3):
            self.res[i] = vpt(self.res[i], self.data[i])
            
    def set_obv(self) -> None:
        '''
            add obv(on-balance volume) to dataframe
        '''
        def obv(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            obv_list = [0] * len(coin)
            obv_list[0] = 68734.4525
            coin.loc[0, 'obv'] = 68734.4525
            volume = coin['volume']
            close = coin['close']
            
            for i in range(1, len(coin)):
                if close.iloc[i] > close.iloc[i-1]:
                    obv_list[i] = obv_list[i-1] + volume.iloc[i]
                elif close.iloc[i] == close.iloc[i-1]:
                    obv_list[i] = obv_list[i-1]
                else:
                    obv_list[i] = obv_list[i-1] - volume.iloc[i]
            res.loc[:, 'obv'] = obv_list
            
            return res
        
        for i in range(3):
            self.res[i] = obv(self.res[i], self.data[i])
            
    def set_std(self) -> None:
        '''
            add standard deviation to dataframe
            It returns std for 5, 10, 20, 60 intervals
            data's interval is 12hours
        '''
        def std(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            days = [5, 10, 20, 60]
            close = coin['close']
            deviation = pd.DataFrame()

            for day in days:
                deviation.loc[:, 'std_' + str(day)] = coin['close'].rolling(day).mean()


            res = pd.concat([res, deviation], axis=1)
            
            return res
        
        for i in range(3):
            self.res[i] = std(self.res[i], self.data[i])
        
    def set_mfi(self, period: int=14) -> None:
        '''
            set mfi(money flow index) to dataframe
            can set periods
        '''
        def mfi(res: pd.DataFrame, coin: pd.DataFrame,
               period: int) -> pd.DataFrame:
            close = coin['close']
            high = coin['high']
            low = coin['low']
            volume = coin['volume']
            
            typical_price = (close + high + low) / 3
            money_flow = typical_price * volume
            positive_flow = []
            negative_flow = []
            
            for i in range(1, len(typical_price)):
                if typical_price[i] > typical_price[i-1]:
                    positive_flow.append(money_flow[i-1])
                    negative_flow.append(0)
                elif typical_price[i] < typical_price[i-1]:
                    positive_flow.append(0)
                    negative_flow.append(money_flow[i-1])
                else:
                    positive_flow.append(0)
                    negative_flow.append(0)
            
            positive_mf = []
            negative_mf = []
            
            for i in range(period-1, len(positive_flow)):
                positive_mf.append(sum(positive_flow[i+1-period:i+1]))
            
            for i in range(period-1, len(negative_flow)):
                negative_mf.append(sum(negative_flow[i+1-period:i+1]))
                
            mfi = 100 * (np.array(positive_mf) / \
                        (np.array(positive_mf) + np.array(negative_mf)))
            res.loc[:, 'mfi'] = np.r_['0, 1', np.full(period, np.nan), mfi]
            
            return res
        
        for i in range(3):
            self.res[i] = mfi(self.res[i], self.data[i], period)
            
    def set_ema(self) -> None:
        '''
            set ema(exponential moving average) to dataframe
            It returns ema for 5, 10, 20, 60 intervals
            data's interval is 12hours
        '''
        def ema(res: pd.DataFrame, coin: pd.DataFrame) -> pd.DataFrame:
            days = [5, 10, 20, 60]
            close = coin['close']
            
            for day in days:
                ema = close.ewm(span=day, adjust=False).mean()
                res.loc[:, 'ema_' + str(day)] = ema
            return res
        
        for i in range(3):
            self.res[i] = ema(self.res[i], self.data[i])
            
    def set_fi(self, period: int=14) -> None:
        '''
            set fi(force index) to dataframe
        '''
        def fi(res: pd.DataFrame, coin: pd.DataFrame,
              period: int) -> pd.DataFrame:
            close = coin['close']
            volume = coin['volume']
            
            fi = pd.Series(close.diff(period) * volume, name='fi')
            res.loc[:, 'fi'] = fi
            
            return res
        
        for i in range(3):
            self.res[i] = fi(self.res[i], self.data[i], period)
            
    def set_bb(self, period: int=20, k: int=2) -> None:
        '''
            set bb(bollinger band) to dataframe
        '''
        def bb(res: pd.DataFrame, coin: pd.DataFrame,
              period: int, k: int) -> pd.DataFrame:
            x = coin['close']
            mbb = x.rolling(period).mean()
            ubb = mbb + k * x.rolling(period).std()
            lbb = mbb - k * x.rolling(period).std()
            
            bollinger_band= pd.DataFrame()
            bollinger_band['ubb'] = ubb
            bollinger_band['mbb'] = mbb
            bollinger_band['lbb'] = lbb
            
            res = pd.concat([res, bollinger_band], axis=1)
            
            return res
        
        for i in range(3):
            self.res[i] = bb(self.res[i], self.data[i], period, k)
            
    def set_indicators(self) -> None:
        self.set_log_scale()
        self.set_nvi()
        self.set_pvi()
        self.set_ma()
        self.set_rsi()
        self.set_vpt()
        self.set_obv()
        self.set_std()
        self.set_mfi()
        self.set_ema()
        self.set_fi()
        self.set_bb()
            
    def get_res(self) -> List[pd.DataFrame]:
        '''
            return Indicator(object)'s result
        '''
        for i in range(3):
            self.res[i].dropna(inplace=True)
            self.res[i].reset_index(drop=True, inplace=True)
        return self.res

In [8]:
indicator = Indicator(data, res)

In [9]:
indicator.set_indicators()

In [10]:
res = indicator.get_res()

## 기술지표 요약

In [11]:
res[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3962 entries, 0 to 3961
Data columns (total 25 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   close      3962 non-null   float64
 1   volume     3962 non-null   float64
 2   close_log  3962 non-null   float64
 3   nvi        3962 non-null   float64
 4   pvi        3962 non-null   float64
 5   ma_5       3962 non-null   float64
 6   ma_10      3962 non-null   float64
 7   ma_20      3962 non-null   float64
 8   ma_60      3962 non-null   float64
 9   rsi        3962 non-null   float64
 10  vpt        3962 non-null   float64
 11  obv        3962 non-null   float64
 12  std_5      3962 non-null   float64
 13  std_10     3962 non-null   float64
 14  std_20     3962 non-null   float64
 15  std_60     3962 non-null   float64
 16  mfi        3962 non-null   float64
 17  ema_5      3962 non-null   float64
 18  ema_10     3962 non-null   float64
 19  ema_20     3962 non-null   float64
 20  ema_60  

In [12]:
res[1].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3962 entries, 0 to 3961
Data columns (total 25 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   close      3962 non-null   float64
 1   volume     3962 non-null   float64
 2   close_log  3962 non-null   float64
 3   nvi        3962 non-null   float64
 4   pvi        3962 non-null   float64
 5   ma_5       3962 non-null   float64
 6   ma_10      3962 non-null   float64
 7   ma_20      3962 non-null   float64
 8   ma_60      3962 non-null   float64
 9   rsi        3962 non-null   float64
 10  vpt        3962 non-null   float64
 11  obv        3962 non-null   float64
 12  std_5      3962 non-null   float64
 13  std_10     3962 non-null   float64
 14  std_20     3962 non-null   float64
 15  std_60     3962 non-null   float64
 16  mfi        3962 non-null   float64
 17  ema_5      3962 non-null   float64
 18  ema_10     3962 non-null   float64
 19  ema_20     3962 non-null   float64
 20  ema_60  

In [13]:
res[2].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3962 entries, 0 to 3961
Data columns (total 25 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   close      3962 non-null   float64
 1   volume     3962 non-null   float64
 2   close_log  3962 non-null   float64
 3   nvi        3962 non-null   float64
 4   pvi        3962 non-null   float64
 5   ma_5       3962 non-null   float64
 6   ma_10      3962 non-null   float64
 7   ma_20      3962 non-null   float64
 8   ma_60      3962 non-null   float64
 9   rsi        3962 non-null   float64
 10  vpt        3962 non-null   float64
 11  obv        3962 non-null   float64
 12  std_5      3962 non-null   float64
 13  std_10     3962 non-null   float64
 14  std_20     3962 non-null   float64
 15  std_60     3962 non-null   float64
 16  mfi        3962 non-null   float64
 17  ema_5      3962 non-null   float64
 18  ema_10     3962 non-null   float64
 19  ema_20     3962 non-null   float64
 20  ema_60  

## 열 순서 변경

In [14]:
col = res[0].columns.tolist()
col = col[:1] + col[2:] + col[1:2]
col

for i in range(3):
    res[i] = res[i][col]

## 시간 데이터 저장

In [15]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4021 entries, 0 to 4020
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   time    4021 non-null   object
dtypes: object(1)
memory usage: 31.5+ KB


In [16]:
remove_row = len(time) - len(res[0])
time = time.drop(time.head(59).index)
time = time.reset_index(drop=True)

In [17]:
time

Unnamed: 0,time
0,2017-06-30 00:00:00
1,2017-06-30 12:00:00
2,2017-07-01 00:00:00
3,2017-07-01 12:00:00
4,2017-07-02 00:00:00
...,...
3957,2022-11-29 00:00:00
3958,2022-11-29 12:00:00
3959,2022-11-30 00:00:00
3960,2022-11-30 12:00:00


In [20]:
time.to_csv('indicator/time.csv', index=False)

## 기술지표 및 시간 데이터 저장

In [21]:
res[0].to_csv('indicator/bitcoin_indicator.csv', index=False)
res[1].to_csv('indicator/repl_indicator.csv', index=False)
res[2].to_csv('indicator/etherium_indicator.csv', index=False)

# 기술지표 참조

## Log scale
$$
log\; close\; price = log(close)
$$

## NVI(Negative Volume Index)
$$
\begin{cases}
NVI = NVI_{prev} + \frac{close - close_{prev}} {close_prev} \times NVI_{prev}\quad, 
\;volume_{today} < volume_{yesterday}\\
0\quad \text{, otherwise}\\
\end{cases}
$$

## PVI(Positive Volume Index)
$$
\begin{cases}
PVI = PVI_{prev} + \frac{close}{close_prev} \times 100\quad ,\; volume_{today} > volume_{yesterday}\\
0\quad ,\; \text{otherwise}
\end{cases}
$$

## MA(Moving Average)
$$
MA_k = \frac{1}{k} \sum_{i=n-k+1}^{n}p_i
$$

## RSI(Relative Strength Index)
$$
\begin{align}
&case1.\; \text{the close being higher than the previous close}\\
&\quad U = close_{now} - close{prev}\\
&\quad D = 0\\
&\\
&case2.\; \text{the close being lower than the previous period's close}\\
&\quad U = 0\\
&\quad D = close_{prev} - close_{now}\\
&\\
&RS = \frac{SMMA(U, n)}{SMMA(D, n)}\\
&RSI = 100 - \frac{100}{1+RS}
\end{align}
$$

## VPT(Volume Price Trend)
$$
VPT = VPT_{prev} + volume \times \frac{close_{today} - close_{prev}} {close_{prev}}
$$

## OBV(On-balance Volume)
$$
OBV = OBV_{prev} +
\begin{cases}
    volume \quad \text{if close > }close_{prev}\\
    0 \quad \text{if close = }close_{prev}\\
    -volume \quad \text{if close < }close_{prev}
\end{cases}
$$

## STD(Standard Deviation)
$$
\sigma = \sqrt{\frac{1}{N}\sum_{i=1}^{N}(x_i - \mu) ^ 2}
$$

## MFI(Money Flow Index)
$$
\begin{align}
    &typical\; price = \frac{high + low + close} {3}\\
    &money\; flow = typical\; price \times volume\\
    &\begin{cases}
    &    \text{positive money flow + money flow, if typical price > } typical\; price_{prev}\\
    &    \text{negative money flow + money flow, if typical price < } typical\; price_{prev}
    &\end{cases}\\
    &money\; ratio = \frac{positive\; money\; flow} {negative\; money\; flow}\\
    &MFI = 100 - \frac{100} {1 + money\; ratio}
\end{align}
$$

## EMA(Exponential Moving Average)
$$
    EMA = price \times \frac{smoothing}{1 + days} + EMA_{prev} \times (1 - \frac{smoothing}{1 + days})
$$

## FI(Force Index)
$$
    FI = close - close_{prev} \times volume
$$

## BB(Bollinger Band)
$$
\begin{align}
    &\%b = \frac {(last - lowerBB)} {upperBB - lowerBB}\\
    &Bandwidth = \frac {upperBB - lowerBB} {middleBB}
\end{align}
$$