In [5]:
import os
import pandas as pd
import numpy as np
import pickle as pk
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import ccxt
import time
from dotenv import load_dotenv
from alpha_vantage.techindicators import TechIndicators
from alpha_vantage.cryptocurrencies import CryptoCurrencies
import csv
import json

In [6]:
load_dotenv()

True

In [7]:
kraken_pub_key = os.getenv("KRAKEN_PUBLIC_KEY")
kraken_sec_key = os.getenv("KRAKEN_SECRET_KEY")
kraken = ccxt.kraken({"apiKey": kraken_pub_key, "secret": kraken_sec_key})

av_key = os.getenv("ALPHAVANTAGE_API_KEY")

In [8]:
# Getting BTC data and setting columns and date
btc_historical = kraken.fetch_ohlcv("BTC/USD", "1d")
btc_historical_df = pd.DataFrame(
    btc_historical, 
    columns=['date','open','high','low','close','volume']
)
btc_historical_df['date'] = pd.to_datetime(btc_historical_df['date'], unit='ms')
btc_historical_df = btc_historical_df.sort_values(by='date')

In [None]:
btc_historical_df

In [9]:
if not os.path.exists('./Data'):
    os.makedirs('./Data')
if not os.path.exists('./Models'):
    os.makedirs('./Models')
if not os.path.exists('./Results'):
    os.makedirs('./Results')

In [None]:
import requests
import pandas as pd

API_URL = "https://www.alphavantage.co/query" 
symbol = 'BTCUSD'

data = { "function": "TIME_SERIES_DAILY", 
"symbol": symbol,
"outputsize" : "full",
"datatype": "json", 
"apikey": av_key } 

response = requests.get(API_URL, data) 
response_json = response.json() # maybe redundant

data = pd.DataFrame.from_dict(response_json['Time Series (Daily)'], orient= 'index').sort_index(axis=1)
data = data.rename(columns={ '1. open': 'Open', '2. high': 'High', '3. low': 'Low', '4. close': 'Close', '5. adjusted close': 'AdjClose', '6. volume': 'Volume'})
data = data[[ 'Open', 'High', 'Low', 'Close']]
data.tail() # check OK or not

In [None]:
# Technical indicator variables
ti = TechIndicators(output_format='pandas')
ema = ti.get_ema('BTCUSD', interval='daily', time_period=20)
bbands = ti.get_bbands('BTCUSD', interval='daily', time_period=20, series_type='close', matype=1)
rsi = ti.get_rsi('BTCUSD', interval='daily', time_period=60, series_type='close')

In [10]:
# Defining functions
def compute_sma(df, window, colname):
    df[colname] = df['close'].rolling(window=window, center=False).mean()
    return(df)

def compute_rsi(df, window, colname):
    '''Computes RSI column for a dataframe. http://stackoverflow.com/a/32346692/3389859'''
    series = df['close']
    delta = series.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    # first value is sum of avg gains
    u[u.index[window - 1]] = np.mean(u[:window])
    u = u.drop(u.index[:(window - 1)])
    # first value is sum of avg losses
    d[d.index[window - 1]] = np.mean(d[:window])
    d = d.drop(d.index[:(window - 1)])
    rs = u.ewm(com=window - 1,ignore_na=False,
               min_periods=0,adjust=False).mean() / d.ewm(com=window - 1, ignore_na=False,
                                            min_periods=0,adjust=False).mean()
    df[colname] = 100 - 100 / (1 + rs)
    df[colname].fillna(df[colname].mean(), inplace=True)
    return(df)


In [11]:
def compute_variables1(df):
    print("Let's compute predictive variables : ")
    df["date"] = pd.to_datetime(df["date"])
    df['bodysize'] = df['close'] - df['open']
    df['shadowsize'] = df['high'] - df['low']
    for window in [20, 50, 100]:
        print(window)
        df = compute_sma(df, window, colname = 'sma_{}'.format(window))
        df = compute_rsi(df, window, colname = 'rsi_{}'.format(window))
        df["Min_{}".format(window)] = df["low"].rolling(window).min()
        df["Max_{}".format(window)] = df["high"].rolling(window).max()
        df["volume_{}".format(window)] = df["volume"].rolling(window).mean()
        df['percentChange_{}'.format(window)] = df['close'].pct_change(periods = window)
        df['RelativeSize_sma_{}'.format(window)] = df['close'] / df['sma_{}'.format(window)]
        df['Diff_{}'.format(window)] = df['close'].diff(window)

    # df["Modulo_10"] = df["close"].copy() % 10
    # df["Modulo_100"] = df["close"].copy() % 100
    # df["Modulo_1000"] = df["close"].copy() % 1000
    # df["Modulo_500"] = df["close"].copy() % 500
    # df["Modulo_50"] = df["close"].copy() % 50
    # (b) Add weekday and day of the month
    df["WeekDay"] = df["date"].dt.weekday
    df["Day"] = df["date"].dt.day
    df.dropna(inplace=True)
    return(df)

In [12]:
df = compute_variables1(btc_historical_df)

Let's compute predictive variables : 
20
50
100


In [13]:
df

Unnamed: 0,date,open,high,low,close,volume,bodysize,shadowsize,sma_20,rsi_20,...,sma_100,rsi_100,Min_100,Max_100,volume_100,percentChange_100,RelativeSize_sma_100,Diff_100,WeekDay,Day
100,2019-08-07,11458.1,12173.8,11387.0,11962.7,9518.032434,504.6,786.8,10385.055,60.603878,...,9216.872,59.583398,5132.0,13875.7,7448.022351,1.323305,1.297913,6813.7,2,7
101,2019-08-08,11973.3,12065.0,11472.6,11982.6,5503.289670,9.3,592.4,10457.425,60.714448,...,9283.968,59.606238,5270.0,13875.7,7474.964069,1.272445,1.290677,6709.6,3,8
102,2019-08-09,11992.9,12042.0,11660.0,11857.6,4972.627351,-135.3,382.0,10511.560,59.608274,...,9349.303,59.393278,5311.3,13875.7,7500.255874,1.227156,1.268287,6533.5,4,9
103,2019-08-10,11859.6,11975.0,11230.2,11284.7,5075.904543,-574.9,744.8,10546.030,54.792174,...,9408.245,58.426888,5368.7,13875.7,7529.912226,1.093442,1.199448,5894.2,5,10
104,2019-08-11,11280.0,11581.0,11101.9,11526.7,3242.585424,246.7,479.1,10606.170,56.359961,...,9466.912,58.713520,5525.0,13875.7,7508.998613,1.036519,1.217578,5866.7,6,11
105,2019-08-12,11539.7,11550.2,11235.0,11384.2,2521.409943,-155.5,315.2,10683.350,55.173964,...,9523.119,58.473726,5563.4,13875.7,7486.865612,0.975223,1.195428,5620.7,0,12
106,2019-08-13,11386.0,11442.6,10751.0,10857.0,5530.491689,-529.0,691.6,10737.710,50.994924,...,9574.496,57.594690,5563.4,13875.7,7525.789601,0.898309,1.133950,5137.7,1,13
107,2019-08-14,10855.1,10862.0,9900.0,10018.0,9345.629624,-837.1,962.0,10744.710,45.253055,...,9617.802,56.235721,5660.0,13875.7,7594.194112,0.761438,1.041610,4330.6,2,14
108,2019-08-15,10018.0,10450.0,9490.0,10304.5,11725.544213,286.5,960.0,10767.335,47.382637,...,9663.395,56.589030,5660.0,13875.7,7669.545626,0.793584,1.066344,4559.3,3,15
109,2019-08-16,10300.9,10535.0,9728.0,10362.1,8443.901142,61.2,807.0,10811.850,47.812250,...,9707.595,56.660083,5933.2,13875.7,7729.239918,0.743845,1.067422,4420.0,4,16


In [21]:
df.to_csv('./Data/Datasetwithvariables_btc.csv', index=False)

In [15]:
split1 = int(.65*len(df))

trainset = df[:split1]
val_test = df[split1:]

split2 = int(.5*len(val_test))

validation_set = val_test[:split2]
testset = val_test[split2:]

In [22]:
trainset.to_csv('./Data/Trainset_btc.csv', index=False)
validation_set.to_csv('./Data/Validationset_btc.csv', index=False)
testset.to_csv('./Data/Testset_btc.csv', index=False)