In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
import numpy as np

In [2]:
!pwd

/home/timjab92/code/timjab92/cryptotrading-indicator/notebooks


# Functions and Pipeline

## Data_Train

In [22]:
def get_train_data():
    """
    Returns the raw training dataset for the price of bitcoin since 31.12.2011. The index is set to the date.
    """
    data = pd.read_csv("../data/BTCUSD_4hours.csv")
    data_train = data.drop(columns="Unnamed: 0").set_index("date")
    return data_train

## Data_API

In [23]:
def get_coingecko():
    #ohlc
    url = "https://api.coingecko.com/api/v3/coins/bitcoin/ohlc?vs_currency=usd&days=30"
    response = requests.get(url).json()
    #cleaning
    data_api = pd.DataFrame(response, columns = ['unix_time','open', 'high', 'low', 'close'])
    data_api["Date"] = pd.to_datetime(data_api["unix_time"], unit='ms')
    data_api = data_api.drop(columns='unix_time').set_index('Date')
    
    #volume
    url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=30"
    response = requests.get(url).json()
    
    volume = pd.DataFrame(response['total_volumes'], columns=["unix_time","volume"])
    volume['date'] = pd.to_datetime(pd.to_datetime(volume['unix_time'],unit='ms').dt.strftime("%Y/%m/%d, %H:00:00"))
    volume = volume.drop(columns='unix_time').set_index('date')
    
    #resample hourly into 4h
    volume = volume.resample("4H").mean()
    
    #concatinate
    volume = volume[-180:]
    data_api = data_api[-181:-1]
    full = pd.concat([data_api, volume], axis=1)
    full.columns=['open', 'high', 'low', 'close', 'volume']

    for x in ['open', 'high', 'low', 'close']:
        full[f'log_{x}'] = full[x].apply(lambda x: np.log(x))
        
    data_api = full.copy()
        
    return data_api

## Feature Engineering

In [24]:
def add_ema(data, tspan=[12,26,20,50,34,55]):
    """
    Adds Exponential Moving Averages (EMA) to the dataframe. The default timeframes are 12,26,20,50,34 and 55.
    """
    for t in tspan:
        data[f'ema{t}'] = data.log_close.ewm(span=t).mean()
    return data

In [25]:
def computeRSI (data, window=14):
    """
    Computes the Relative Stregth Index for a given dataset and the window can be defined. Its default value is 14.
    """
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=window-1 so we get decay alpha=1/window
    up_chg_avg   = up_chg.ewm(com=window-1 , min_periods=window).mean()
    down_chg_avg = down_chg.ewm(com=window-1 , min_periods=window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

def stoch_rsi(rsi, d_window=3, k_window=3, window=14):
    """
    Computes the stochastic RSI. Default values are d=3, k=3, window=14.
    """
    minrsi = rsi.rolling(window=window, center=False).min()
    maxrsi = rsi.rolling(window=window, center=False).max()
    stoch = ((rsi - minrsi) / (maxrsi - minrsi)) * 100
    K = stoch.rolling(window=k_window, center=False).mean()
    D = K.rolling(window=d_window, center=False).mean() 
    return K, D  

In [26]:
def add_stoch_rsi(data, d_window=3, k_window=3, window=14):
    data['rsi'] = computeRSI(data['log_close'], window)
    data['K'], data['D'] = stoch_rsi(data['rsi'], d_window, k_window, window)
    return data

In [27]:
def get_bollinger_bands(prices, rate=20):
    sma = prices.rolling(rate).mean() # <-- Get SMA for 20 days
    std = prices.rolling(rate).std() # <-- Get rolling standard deviation for 20 days
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return sma, bollinger_up, bollinger_down

In [28]:
def add_bollinger(data, prices, rate=20):
    data['sma'], data['bollinger_up'], data['bollinger_down'] = get_bollinger_bands(prices)
    return data

In [29]:
def add_vol_roc(data):
    data['vol_roc'] = data.volume.pct_change()
    return data

In [30]:
get_coingecko()

Unnamed: 0,open,high,low,close,volume,log_open,log_high,log_low,log_close
2021-07-26 16:00:00,38700.54,38700.54,38342.04,38342.04,4.410311e+10,10.563609,10.563609,10.554302,10.554302
2021-07-26 20:00:00,38641.07,39841.37,38641.07,39841.37,5.059842e+10,10.562071,10.592661,10.562071,10.592661
2021-07-27 00:00:00,39580.48,39580.48,37230.72,37413.74,4.869665e+10,10.586091,10.586091,10.524890,10.529793
2021-07-27 04:00:00,37226.65,37385.81,36547.98,36547.98,4.406404e+10,10.524780,10.529046,10.506381,10.506381
2021-07-27 08:00:00,36890.36,37179.90,36890.36,36982.81,4.226844e+10,10.515706,10.523524,10.515706,10.518208
...,...,...,...,...,...,...,...,...,...
2021-08-24 20:00:00,48365.32,48365.32,48103.04,48124.79,3.982502e+10,10.786538,10.786538,10.781101,10.781553
2021-08-25 00:00:00,48525.15,48525.15,48194.11,48306.28,3.814674e+10,10.789837,10.789837,10.782992,10.785317
2021-08-25 04:00:00,47804.62,48288.08,47804.62,48288.08,3.438084e+10,10.774878,10.784940,10.774878,10.784940
2021-08-25 08:00:00,48140.33,48569.68,48118.13,48407.89,3.417438e+10,10.781876,10.790755,10.781414,10.787418


In [31]:
get_train_data()

Unnamed: 0_level_0,open,high,low,close,volume,log_open,log_high,log_low,log_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2011-12-31 08:00:00,4.390000,4.390000,4.390000,4.390000,0.455581,1.479329,1.479329,1.479329,1.479329
2011-12-31 16:00:00,4.490000,4.513333,4.490000,4.513333,31.620766,1.501702,1.506847,1.501702,1.506847
2012-01-01 04:00:00,4.580000,4.580000,4.580000,4.580000,1.502000,1.521699,1.521699,1.521699,1.521699
2012-01-01 16:00:00,4.840000,4.840000,4.840000,4.840000,10.000000,1.576915,1.576915,1.576915,1.576915
2012-01-01 20:00:00,5.000000,5.000000,5.000000,5.000000,10.100000,1.609438,1.609438,1.609438,1.609438
...,...,...,...,...,...,...,...,...,...
2021-08-22 12:00:00,48769.308417,48787.711375,48749.417500,48769.737750,1.697971,10.794844,10.795222,10.794435,10.794853
2021-08-22 16:00:00,48591.665708,48605.941333,48574.903375,48591.580833,0.463362,10.791204,10.791498,10.790859,10.791202
2021-08-22 20:00:00,48807.713542,48825.874417,48791.558000,48810.168750,0.605696,10.795610,10.795982,10.795279,10.795660
2021-08-23 00:00:00,49779.332208,49803.461792,49759.784208,49783.003708,1.941360,10.815334,10.815818,10.814942,10.815408


In [33]:
#Feature engineering Pipeline
data = get_coingecko()
add_ema(data)
add_stoch_rsi(data)
add_bollinger(data,data.log_close)
add_vol_roc(data)


Unnamed: 0,open,high,low,close,volume,log_open,log_high,log_low,log_close,ema12,...,ema50,ema34,ema55,rsi,K,D,sma,bollinger_up,bollinger_down,vol_roc
2021-07-26 16:00:00,38700.54,38700.54,38342.04,38342.04,4.410311e+10,10.563609,10.563609,10.554302,10.554302,10.554302,...,10.554302,10.554302,10.554302,,,,,,,
2021-07-26 20:00:00,38641.07,39841.37,38641.07,39841.37,5.059842e+10,10.562071,10.592661,10.562071,10.592661,10.575080,...,10.573865,10.574046,10.573830,,,,,,,0.147276
2021-07-27 00:00:00,39580.48,39580.48,37230.72,37413.74,4.869665e+10,10.586091,10.586091,10.524890,10.529793,10.557405,...,10.558583,10.558419,10.558614,,,,,,,-0.037586
2021-07-27 04:00:00,37226.65,37385.81,36547.98,36547.98,4.406404e+10,10.524780,10.529046,10.506381,10.506381,10.541298,...,10.544739,10.544240,10.544835,,,,,,,-0.095132
2021-07-27 08:00:00,36890.36,37179.90,36890.36,36982.81,4.226844e+10,10.515706,10.523524,10.515706,10.518208,10.535025,...,10.539000,10.538404,10.539116,,,,,,,-0.040750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-24 20:00:00,48365.32,48365.32,48103.04,48124.79,3.982502e+10,10.786538,10.786538,10.781101,10.781553,10.801412,...,10.777470,10.788524,10.774190,44.362840,0.000000,8.214348,10.804760,10.826861,10.782659,0.024600
2021-08-25 00:00:00,48525.15,48525.15,48194.11,48306.28,3.814674e+10,10.789837,10.789837,10.782992,10.785317,10.798936,...,10.777778,10.788341,10.774588,46.262926,2.789676,5.037066,10.804043,10.827716,10.780370,-0.042141
2021-08-25 04:00:00,47804.62,48288.08,47804.62,48288.08,3.438084e+10,10.774878,10.784940,10.774878,10.784940,10.796782,...,10.778059,10.788147,10.774958,46.093214,5.330184,2.706620,10.802760,10.827686,10.777835,-0.098721
2021-08-25 08:00:00,48140.33,48569.68,48118.13,48407.89,3.417438e+10,10.781876,10.790755,10.781414,10.787418,10.795342,...,10.778426,10.788105,10.775404,47.458222,9.874773,5.998211,10.801782,10.827531,10.776032,-0.006005


# Manual Code and Plots

In [None]:
data = pd.read_csv("../data/BTCUSD_2011-12-31_to_2021-08-23_4hours_Clean.csv")
data = data.drop(columns="Unnamed: 0").set_index("date")

In [None]:
fig, axs = plt.subplots(2,1, figsize=(15,7))
axs[0].plot(data.close)
axs[1].plot(data.log_close)
plt.show()

# add ema

In [None]:
data['ema12'] = data.log_close.ewm(span=12).mean()
data['ema26'] = data.log_close.ewm(span=26).mean()
data['ema20'] = data.log_close.ewm(span=20).mean()
data['ema50'] = data.log_close.ewm(span=50).mean()
data['ema34'] = data.log_close.ewm(span=34).mean()
data['ema55'] = data.log_close.ewm(span=55).mean()

In [None]:
plt.figure(figsize=(14,8))
plt.plot(data[["log_close", "ema12","ema26","ema20","ema50","ema34","ema55"]][-60:])
plt.xticks(rotation=90)
plt.show()

# add stoch rsi

In [None]:
def computeRSI (data, time_window):
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]
    
    # check pandas documentation for ewm
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html
    # values are related to exponential decay
    # we set com=time_window-1 so we get decay alpha=1/time_window
    up_chg_avg   = up_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    down_chg_avg = down_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

def stoch_rsi(rsi, d_window, k_window, window):
    minrsi = rsi.rolling(window=window, center=False).min()
    maxrsi = rsi.rolling(window=window, center=False).max()
    stoch = ((rsi - minrsi) / (maxrsi - minrsi)) * 100
    K = stoch.rolling(window=k_window, center=False).mean()
    D = K.rolling(window=d_window, center=False).mean() 
    return K, D  

In [None]:
data['rsi'] = computeRSI(data['log_close'], 14)
data['K'], data['D'] = stoch_rsi(data['rsi'], 3, 3, 14)

In [None]:
fig, axs = plt.subplots(2,1, figsize=(15,7))
axs[0].plot(data.rsi[-60:])
axs[1].plot(data[["K","D"]][-60:])
plt.show()

# add bollinger bands

In [None]:
def get_sma(prices, rate):
    return prices.rolling(rate).mean()


def get_bollinger_bands(prices, rate=20):
    sma = get_sma(prices, rate) # <-- Get SMA for 20 days
    std = prices.rolling(rate).std() # <-- Get rolling standard deviation for 20 days
    bollinger_up = sma + std * 2 # Calculate top band
    bollinger_down = sma - std * 2 # Calculate bottom band
    return bollinger_up, bollinger_down

In [None]:
data['sma'] = get_sma(data['log_close'], 20) # Get 20 day SMA
data['bollinger_up'], data['bollinger_down'] = get_bollinger_bands(data['log_close'])

In [None]:
plt.figure(figsize=(15,8))
plt.title(' Bollinger Bands')
plt.xlabel('Days')
plt.ylabel('Closing Prices')
plt.plot(data['log_close'][-60:], label='Closing Prices')
plt.plot(data['bollinger_up'][-60:], label='Bollinger Up', c='g')
plt.plot(data['bollinger_down'][-60:], label='Bollinger Down', c='r')
plt.legend()
plt.show()

# volume rate of change

In [None]:
data['vol_roc'] = data.volume.pct_change()

# final dataframe

In [None]:
data = data.drop(columns=['open','high','low','close'])

# Coingecko

In [None]:
url = "https://api.coingecko.com/api/v3/coins/bitcoin/ohlc?vs_currency=usd&days=30"

response = requests.get(url).json()

data_api = pd.DataFrame(response, columns = ['unix_time','open', 'high', 'low', 'close'])
data_api["Date"] = pd.to_datetime(data_api["unix_time"], unit='ms')
data_api = data_api.drop(columns='unix_time').set_index('Date')

In [None]:
url = "https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=30"
response = requests.get(url).json()

In [None]:
volume = pd.DataFrame(response['total_volumes'], columns=["unix_time","volume"])
volume['date'] = pd.to_datetime(pd.to_datetime(volume['unix_time'],unit='ms').dt.strftime("%Y/%m/%d, %H:00:00"))
volume = volume.drop(columns='unix_time').set_index('date')

#resample hourly into 4h
volume = volume.resample("4H").mean()

In [None]:
volume = volume[-180:]
data_api = data_api[-181:-1]
full = pd.concat([data_api, volume], axis=1)
full.columns=['open', 'high', 'low', 'close', 'volume']

for x in ['open', 'high', 'low', 'close']:
    full[f'log_{x}'] = full[x].apply(lambda x: np.log(x))