In [1]:
import talib
import pandas as pd
from pandas import DataFrame, to_datetime
import json

In [2]:
# talib.get_functions()
# 參考：https://github.com/freqtrade/freqtrade/blob/c57d8078458fe313f66abf0146b450c5be97e060/freqtrade/data/converter.py

In [3]:
def getAtrRatio(df, period=14):
        """
            平均波動率：ATR(14)/MA(14)
        """
        highs = df['high']
        lows = df['low']
        closes = df['close']

        atr = talib.ATR(highs, lows, closes, timeperiod=period)
        ma = talib.MA(closes, timeperiod=period)

        volatility = atr/ma

        s = pd.Series(volatility, index=df.index, name='volatility').dropna()

        return s

In [4]:
def getAtr(df, period=14):
        """
            平均波動率：ATR(14)/MA(14)
        """
        highs = df['high']
        lows = df['low']
        closes = df['close']

        atr = talib.ATR(highs, lows, closes, timeperiod=period)
       #ma = talib.MA(closes, timeperiod=period)

        #volatility = atr/ma

        #s = pd.Series(volatility, index=df.index, name='volatility').dropna()

        return atr

ATR DEFINITION
1. The average true range (ATR) is a market volatility indicator used in technical analysis.
2. It is typically derived from the 14-day simple moving average of a series of true range indicators.
3. The ATR was originally developed for use in commodities markets but has since been applied to all types of securities.

In [5]:
symbol = 'BNB'
with open(f'./data/ftx/{symbol}_USD-5m.json', 'r') as f:
    ohlcv_list = json.load(f)

In [6]:
ohlcv_list[-1]

[1657616400000, 223.098, 223.298, 223.023, 223.091, 4818.42811]

In [7]:
df = pd.DataFrame(ohlcv_list, columns=['date','open','high','low','close','volume'])
df['date'] = pd.to_datetime(df['date'], unit='ms', utc=True, infer_datetime_format=True)

In [8]:
len(df)

160514

In [23]:
df.tail(2)

Unnamed: 0,date,open,high,low,close,volume
160275,2022-07-11 13:10:00+00:00,232.521,232.521,232.233,232.32,244092.26971
160276,2022-07-11 13:15:00+00:00,232.32,232.636,232.262,232.569,274492.82811


In [24]:
def timeframe_to_minutes(timeframe: str):
    timeframe = timeframe.lower().strip()
    if timeframe[-1] == 'm':
        return timeframe[0:-1]
    elif timeframe[-1] == 'h':
        return int(timeframe[0:-1]) * 60
    elif timeframe[-1] == 'd':
        return int(timeframe[0:-1]) * 1440
    
def resample_dataframe(df: DataFrame, timeframe: str, drop_incomplete: bool = True) -> DataFrame:
    """
    :param df:
    :param timeframe: e.g. 15T = 15 minutes, 15M = 15 months
    """
    timeframe_minutes = timeframe_to_minutes(timeframe)
    #print(timeframe_minutes)
    df_resampled = df.resample(f'{timeframe_minutes}min', on='date', origin='start').agg(
        {'open': 'first',
            'high': 'max',
            'low': 'min',
            'close': 'last',
            'volume': 'sum'}
    ).dropna()[['open', 'high', 'low', 'close', 'volume']]
    # eliminate partial candle
    if drop_incomplete:
        df_resampled.drop(df_resampled.tail(1).index, inplace=True)
        print('Dropping last candle')
    return df_resampled

In [25]:
df1 = resample_dataframe(df, '15m')
df1.head()

Dropping last candle


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01 00:00:00+00:00,37.422,37.465,37.069,37.093,76243.89899
2021-01-01 00:15:00+00:00,37.093,37.157,37.048,37.134,25370.23378
2021-01-01 00:30:00+00:00,37.134,37.207,37.103,37.189,8143.24342
2021-01-01 00:45:00+00:00,37.189,37.473,37.177,37.468,782.95897
2021-01-01 01:00:00+00:00,37.468,37.614,37.423,37.612,391.40355


In [26]:
print(len(resample_dataframe(df, '15m')))
print(len(resample_dataframe(df, '1h')))
print(len(resample_dataframe(df, '4H')))
print(len(resample_dataframe(df, '1D')))

Dropping last candle
53428
Dropping last candle
13357
Dropping last candle
3339
Dropping last candle
556


In [29]:
getAtr(resample_dataframe(df, '15m'))

Dropping last candle


date
2021-01-01 00:00:00+00:00         NaN
2021-01-01 00:15:00+00:00         NaN
2021-01-01 00:30:00+00:00         NaN
2021-01-01 00:45:00+00:00         NaN
2021-01-01 01:00:00+00:00         NaN
                               ...   
2022-07-11 12:00:00+00:00    0.824616
2022-07-11 12:15:00+00:00    0.842000
2022-07-11 12:30:00+00:00    0.828929
2022-07-11 12:45:00+00:00    0.831005
2022-07-11 13:00:00+00:00    0.834076
Length: 53428, dtype: float64

In [27]:
getAtrRatio(resample_dataframe(df, '15m'))

Dropping last candle


date
2021-01-01 03:30:00+00:00    0.004913
2021-01-01 03:45:00+00:00    0.004912
2021-01-01 04:00:00+00:00    0.004829
2021-01-01 04:15:00+00:00    0.004820
2021-01-01 04:30:00+00:00    0.004819
                               ...   
2022-07-11 12:00:00+00:00    0.003558
2022-07-11 12:15:00+00:00    0.003632
2022-07-11 12:30:00+00:00    0.003574
2022-07-11 12:45:00+00:00    0.003581
2022-07-11 13:00:00+00:00    0.003594
Name: volatility, Length: 53414, dtype: float64

In [31]:
atr = getAtr(resample_dataframe(df, '1d'))

Dropping last candle


In [36]:
#review atr for a time period
atr.loc['2022-05-15 00:00:00':'2022-05-31 00:00:00']

date
2022-05-15 00:00:00+00:00    30.648021
2022-05-16 00:00:00+00:00    30.036662
2022-05-17 00:00:00+00:00    28.940972
2022-05-18 00:00:00+00:00    28.428260
2022-05-19 00:00:00+00:00    27.993384
2022-05-20 00:00:00+00:00    27.570857
2022-05-21 00:00:00+00:00    26.670867
2022-05-22 00:00:00+00:00    25.639519
2022-05-23 00:00:00+00:00    25.369197
2022-05-24 00:00:00+00:00    24.872968
2022-05-25 00:00:00+00:00    24.093256
2022-05-26 00:00:00+00:00    24.908309
2022-05-27 00:00:00+00:00    24.492144
2022-05-28 00:00:00+00:00    23.712634
2022-05-29 00:00:00+00:00    22.709732
2022-05-30 00:00:00+00:00    22.526965
2022-05-31 00:00:00+00:00    21.731968
Freq: 1440T, dtype: float64

In [53]:
getAtrRatio(resample_dataframe(df, '1d'))

Dropping last candle


date
2021-01-15 00:00:00+00:00    0.087752
2021-01-16 00:00:00+00:00    0.086697
2021-01-17 00:00:00+00:00    0.089719
2021-01-18 00:00:00+00:00    0.087420
2021-01-19 00:00:00+00:00    0.090362
                               ...   
2022-07-06 00:00:00+00:00    0.068697
2022-07-07 00:00:00+00:00    0.066223
2022-07-08 00:00:00+00:00    0.064827
2022-07-09 00:00:00+00:00    0.061527
2022-07-10 00:00:00+00:00    0.060755
Freq: 1440T, Name: volatility, Length: 542, dtype: float64

----
----

In [15]:
timeframe= '1D' # '2H'
df_resampled = df.resample(timeframe, on='date', origin='start').agg(
    {'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'}
).dropna()[['open', 'high', 'low', 'close', 'volume']]

In [16]:
len(df_resampled)

557

In [17]:
df_resampled.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01 00:00:00+00:00,37.422,39.025,37.048,37.841,4187075.0
2021-01-02 00:00:00+00:00,37.841,38.923,36.904,38.279,4329427.0
2021-01-03 00:00:00+00:00,38.279,41.745,37.788,41.281,10395750.0
2021-01-04 00:00:00+00:00,41.281,43.411,38.089,41.113,8678887.0
2021-01-05 00:00:00+00:00,41.113,41.889,38.85,41.889,5409385.0


In [13]:
timeframe_to_minutes('15h')

900

In [14]:
'1h'.lower().strip()

'1h'

In [15]:
'1h'[0:-1]

'1'

In [16]:
print(len(resample_dataframe(df, '15m')))
print(len(resample_dataframe(df, '1h')))
print(len(resample_dataframe(df, '4H')))
print(len(resample_dataframe(df, '1D')))

Dropping last candle
53428
Dropping last candle
13357
Dropping last candle
3339
Dropping last candle
556


----
----

In [17]:
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-01 00:00:00+00:00,28965.0,29070.5,28960.0,29022.0,1439056.0
2021-01-01 00:05:00+00:00,29022.0,29024.0,28888.5,28906.5,2164178.0
2021-01-01 00:10:00+00:00,28906.5,28926.0,28737.5,28812.5,1269254.0
2021-01-01 00:15:00+00:00,28812.5,28913.0,28778.5,28891.5,1776118.0
2021-01-01 00:20:00+00:00,28891.5,28902.0,28800.5,28901.5,2040090.0


In [18]:
getAtrRatio(df)

date
2021-01-01 01:10:00+00:00    0.003477
2021-01-01 01:15:00+00:00    0.003604
2021-01-01 01:20:00+00:00    0.003609
2021-01-01 01:25:00+00:00    0.003676
2021-01-01 01:30:00+00:00    0.003730
                               ...   
2022-07-11 12:55:00+00:00    0.001893
2022-07-11 13:00:00+00:00    0.001872
2022-07-11 13:05:00+00:00    0.001814
2022-07-11 13:10:00+00:00    0.001806
2022-07-11 13:15:00+00:00    0.001767
Name: volatility, Length: 160263, dtype: float64