In [17]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
!pwd

# Functions and Pipeline

In [2]:
def get_csv_data():
    """
    Returns the raw training dataset for the price of bitcoin since 31.12.2011. The index is set to the date.
    """
    data = pd.read_csv("../data/BTCUSD_2011-12-31_to_2021-08-23_4hours_Clean.csv")
    data = data.drop(columns="Unnamed: 0").set_index("date")
    return data

In [3]:
def add_ema(data, tspan=[12,26,20,50,34,55]):
    """
    Adds Exponential Moving Averages (EMA) to the dataframe. The default timeframes are 12,26,20,50,34 and 55.
    """
    for t in tspan:
        data[f'ema{t}'] = data.log_close.ewm(span=t).mean()
    return data

In [4]:
def computeRSI (data, window=14):
    """
    Computes the Relative Stregth Index for a given dataset and the window can be defined. Its default value is 14.
    """
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=window-1 so we get decay alpha=1/window
    up_chg_avg   = up_chg.ewm(com=window-1 , min_periods=window).mean()
    down_chg_avg = down_chg.ewm(com=window-1 , min_periods=window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

def stoch_rsi(rsi, d_window=3, k_window=3, window=14):
    """
    Computes the stochastic RSI. Default values are d=3, k=3, window=14.
    """
    minrsi = rsi.rolling(window=window, center=False).min()
    maxrsi = rsi.rolling(window=window, center=False).max()
    stoch = ((rsi - minrsi) / (maxrsi - minrsi)) * 100
    K = stoch.rolling(window=k_window, center=False).mean()
    D = K.rolling(window=d_window, center=False).mean() 
    return K, D  

In [5]:
def add_stoch_rsi(data, d_window=3, k_window=3, window=14):
    data['rsi'] = computeRSI(data['log_close'], window)
    data['K'], data['D'] = stoch_rsi(data['rsi'], d_window, k_window, window)
    return data

In [6]:
def get_bollinger_bands(prices, rate=20):
    sma = prices.rolling(rate).mean() # <-- Get SMA for 20 days
    std = prices.rolling(rate).std() # <-- Get rolling standard deviation for 20 days
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return sma, bollinger_up, bollinger_down

In [7]:
def add_bollinger(data, prices, rate=20):
    data['sma'], data['bollinger_up'], data['bollinger_down'] = get_bollinger_bands(prices)
    return data

In [8]:
def add_vol_roc(data):
    data['vol_roc'] = data.volume.pct_change()
    return data

In [10]:
data

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,...,ema50,ema34,ema55,rsi,K,D,sma,bollinger_up,bollinger_down,vol_roc
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329,1.479329,...,1.479329,1.479329,1.479329,,,,,,,
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847,1.494235,...,1.493363,1.493493,1.493338,,,,,,,68.407580
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699,1.504954,...,1.503189,1.503453,1.503138,,,,,,,-0.952500
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915,1.527669,...,1.522741,1.523470,1.522600,,,,,,,5.657790
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438,1.549885,...,1.541494,1.542744,1.541253,,,,,,,0.010000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853,10.792529,...,10.756861,10.768406,10.753323,62.080327,36.210154,50.235845,10.777789,10.840219,10.715358,2.306553
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202,10.792325,...,10.758207,10.769709,10.754676,59.525060,19.004941,36.146357,10.782060,10.834660,10.729461,-0.727108
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660,10.792838,...,10.759676,10.771192,10.756140,61.603611,5.196078,20.137058,10.786118,10.828258,10.743977,0.307177
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408,10.796311,...,10.761862,10.773719,10.758256,69.159095,26.741234,16.980751,10.789987,10.827486,10.752488,2.205170


In [15]:
#Pipeline
data = get_csv_data()
# add_ema(data)
# add_stoch_rsi(data)
# add_bollinger(data,data.log_close)
# add_vol_roc(data)


In [16]:
data

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438
...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408


In [None]:
data.shape

# Manual Code and Plots

In [None]:
data = pd.read_csv("../data/BTCUSD_2011-12-31_to_2021-08-23_4hours_Clean.csv")
data = data.drop(columns="Unnamed: 0").set_index("date")

In [None]:
data

In [None]:
fig, axs = plt.subplots(2,1, figsize=(15,7))
axs[0].plot(data.close)
axs[1].plot(data.log_close)
plt.show()

In [None]:
data

# add ema

In [None]:
data['ema12'] = data.log_close.ewm(span=12).mean()
data['ema26'] = data.log_close.ewm(span=26).mean()
data['ema20'] = data.log_close.ewm(span=20).mean()
data['ema50'] = data.log_close.ewm(span=50).mean()
data['ema34'] = data.log_close.ewm(span=34).mean()
data['ema55'] = data.log_close.ewm(span=55).mean()

In [None]:
plt.figure(figsize=(14,8))
plt.plot(data[["log_close", "ema12","ema26","ema20","ema50","ema34","ema55"]][-60:])
plt.xticks(rotation=90)
plt.show()

# add stoch rsi

In [None]:
def computeRSI (data, time_window):
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=time_window-1 so we get decay alpha=1/time_window
    up_chg_avg   = up_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    down_chg_avg = down_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

def stoch_rsi(rsi, d_window, k_window, window):
    minrsi = rsi.rolling(window=window, center=False).min()
    maxrsi = rsi.rolling(window=window, center=False).max()
    stoch = ((rsi - minrsi) / (maxrsi - minrsi)) * 100
    K = stoch.rolling(window=k_window, center=False).mean()
    D = K.rolling(window=d_window, center=False).mean() 
    return K, D  

In [None]:
data['rsi'] = computeRSI(data['log_close'], 14)
data['K'], data['D'] = stoch_rsi(data['rsi'], 3, 3, 14)

In [None]:
fig, axs = plt.subplots(2,1, figsize=(15,7))
axs[0].plot(data.rsi[-60:])
axs[1].plot(data[["K","D"]][-60:])
plt.show()

# add bollinger bands

In [None]:
def get_sma(prices, rate):
    return prices.rolling(rate).mean()


def get_bollinger_bands(prices, rate=20):
    sma = get_sma(prices, rate) # <-- Get SMA for 20 days
    std = prices.rolling(rate).std() # <-- Get rolling standard deviation for 20 days
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return bollinger_up, bollinger_down

In [None]:
data['sma'] = get_sma(data['log_close'], 20) # Get 20 day SMA
data['bollinger_up'], data['bollinger_down'] = get_bollinger_bands(data['log_close'])

In [None]:
plt.figure(figsize=(15,8))
plt.title(' Bollinger Bands')
plt.xlabel('Days')
plt.ylabel('Closing Prices')
plt.plot(data['log_close'][-60:], label='Closing Prices')
plt.plot(data['bollinger_up'][-60:], label='Bollinger Up', c='g')
plt.plot(data['bollinger_down'][-60:], label='Bollinger Down', c='r')
plt.legend()
plt.show()

# volume rate of change

In [None]:
data['vol_roc'] = data.volume.pct_change()

# final dataframe

In [None]:
data = data.drop(columns=['open','high','low','close'])

In [None]:
data