In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import ta

In [2]:
start_date = "2021-04-14"  # day we start to have Coinbase news
today = pd.Timestamp.today().strftime("%Y-%m-%d")

### Yield of 10-year Treasury Notes (long-term risk-free rate)

In [3]:
ten_year_treasury_rate = pdr.data.DataReader("DGS10", "fred", start_date, today)
ten_year_treasury_rate.head()

Unnamed: 0_level_0,DGS10
DATE,Unnamed: 1_level_1
2021-04-14,1.64
2021-04-15,1.56
2021-04-16,1.59
2021-04-19,1.61
2021-04-20,1.58


### Yield on 3-month U.S. Treasury bills (short-term risk-free rate)

In [4]:
three_month_tbill_yield = pdr.data.DataReader("DTB3", "fred", start_date, today)
three_month_tbill_yield.head()

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
2021-04-14,0.02
2021-04-15,0.02
2021-04-16,0.02
2021-04-19,0.02
2021-04-20,0.03


### 5-year breakeven inflation rate 

In [5]:
five_year_breakeven_inflation = pdr.data.DataReader("T5YIE", "fred", start_date, today)
five_year_breakeven_inflation.head()

Unnamed: 0_level_0,T5YIE
DATE,Unnamed: 1_level_1
2021-04-14,2.56
2021-04-15,2.55
2021-04-16,2.57
2021-04-19,2.56
2021-04-20,2.52


### Close price for S&P 500 and VIX

In [6]:
sp500_close = yf.download("^GSPC", start=start_date, end=today)["Close"].rename(
    "S&P500 Close"
)
vix_close = yf.download("^VIX", start=start_date, end=today)["Close"].rename(
    "VIX Close"
)
sp500_close.head()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Date
2021-04-14    4124.660156
2021-04-15    4170.419922
2021-04-16    4185.470215
2021-04-19    4163.259766
2021-04-20    4134.939941
Name: S&P500 Close, dtype: float64

### BTC features

In [7]:
btc = yf.download("BTC-USD", start=start_date, end=today)
btc.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-14,63523.753906,64863.097656,61554.796875,63109.695312,63109.695312,77451779687
2021-04-15,63075.195312,63821.671875,62208.964844,63314.011719,63314.011719,60954381579
2021-04-16,63258.503906,63594.722656,60222.53125,61572.789062,61572.789062,84293007468
2021-04-17,61529.921875,62572.175781,60361.351562,60683.820312,60683.820312,66138759198
2021-04-18,60701.886719,61057.457031,52829.535156,56216.183594,56216.183594,97468872758


#### 1. Close - Open price

In [8]:
btc["close_open"] = btc["Close"] - btc["Open"]

#### 2. Open, High, Low, Close, Adj Close, Trading Volume

#### 3. Cumulative return

In [9]:
btc["Daily Return"] = btc["Close"].pct_change()
btc["cumulative_return"] = (1 + btc["Daily Return"]).cumprod() - 1

#### 4. Price volatility by 30-day moving standard deviation

In [10]:
btc["30D_Moving_STD"] = btc["Close"].rolling(window=30).std()

#### 5. Parkinson Volatility

In [11]:
btc["Parkinson_Volatility"] = np.sqrt(
    (1 / (4 * np.log(2))) * (np.log(btc["High"] / btc["Low"]) ** 2)
)

#### 6. Relative intraday price change

In [12]:
btc["Relative_Intraday_Price_Change"] = (
    (btc["Close"] - btc["Open"]) / btc["Open"]
) * 100

#### 7. Bollinger Bands

In [13]:
btc["Middle Band"] = btc["Close"].rolling(window=30).mean()
std_close = btc["Close"].rolling(window=30).std()
btc["Upper Band"] = btc["Middle Band"] + (std_close * 2)
btc["Lower Band"] = btc["Middle Band"] - (std_close * 2)

#### 8.Relative Strength Index (RSI)

In [14]:
btc["rsi"] = ta.momentum.RSIIndicator(close=btc["Close"], window=14).rsi()

#### 9. Stochastic oscillator

In [15]:
stoch = ta.momentum.StochasticOscillator(
    high=btc["High"], low=btc["Low"], close=btc["Close"], window=14, smooth_window=3
)
btc["stoch_%K"] = stoch.stoch()
btc["stoch_%D"] = stoch.stoch_signal()

#### 10. MACD

In [16]:
btc["macd"] = ta.trend.MACD(close=btc["Close"]).macd()
btc["macd_signal"] = ta.trend.MACD(close=btc["Close"]).macd_signal()

#### 11. Average True Range (ATR)

In [17]:
btc["ATR"] = ta.volatility.AverageTrueRange(
    high=btc["High"], low=btc["Low"], close=btc["Close"], window=14
).average_true_range()

### Concatenate all features

In [18]:
btc.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,close_open,Daily Return,cumulative_return,30D_Moving_STD,...,Relative_Intraday_Price_Change,Middle Band,Upper Band,Lower Band,rsi,stoch_%K,stoch_%D,macd,macd_signal,ATR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-03-20,61930.15625,68115.257812,60807.785156,67913.671875,67913.671875,66792634382,5983.515625,0.096925,0.076121,7175.621882,...,9.661716,63065.554167,77416.79793,48714.310403,56.489051,54.904421,38.333343,2909.940528,4022.729251,3979.83592
2024-03-21,67911.585938,68199.992188,64580.917969,65491.390625,65491.390625,44480350565,-2420.195312,-0.035667,0.037739,6890.885625,...,-3.563744,63505.771354,77287.542605,49724.000103,52.298181,36.188397,31.392047,2562.282393,3730.639879,3954.067227
2024-03-22,65489.929688,66623.75,62355.371094,63778.761719,63778.761719,41401116964,-1711.167969,-0.02615,0.010602,6529.134208,...,-2.612872,63903.757422,76962.025838,50845.489006,49.501874,22.95558,38.016132,2124.080895,3409.328082,3976.518061
2024-03-23,63802.722656,65976.398438,63038.492188,64062.203125,64062.203125,24738964812,259.480469,0.004444,0.015093,6080.293328,...,0.406692,64328.998437,76489.585093,52168.411782,49.978567,25.145621,28.096532,1779.165292,3083.295524,3902.331503
2024-03-24,64070.753906,67622.757812,63825.851562,67234.171875,67234.171875,27206630673,3163.417969,0.049514,0.065354,5529.269708,...,4.937382,64879.072526,75937.611942,53820.53311,55.088041,49.654189,32.58513,1741.691427,2814.974705,3894.801128


In [19]:
combined_df = pd.concat(
    [
        btc,
        ten_year_treasury_rate,
        three_month_tbill_yield,
        five_year_breakeven_inflation,
        sp500_close,
        vix_close,
    ],
    axis=1,
    join="outer",
)

combined_df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,close_open,Daily Return,cumulative_return,30D_Moving_STD,...,stoch_%K,stoch_%D,macd,macd_signal,ATR,DGS10,DTB3,T5YIE,S&P500 Close,VIX Close
2021-04-14,63523.753906,64863.097656,61554.796875,63109.695312,63109.695312,77451780000.0,-414.058594,,,,...,,,,,0.0,1.64,0.02,2.56,4124.660156,16.99
2021-04-15,63075.195312,63821.671875,62208.964844,63314.011719,63314.011719,60954380000.0,238.816406,0.003237,0.003237,,...,,,,,0.0,1.56,0.02,2.55,4170.419922,16.57
2021-04-16,63258.503906,63594.722656,60222.53125,61572.789062,61572.789062,84293010000.0,-1685.714844,-0.027501,-0.024353,,...,,,,,0.0,1.59,0.02,2.57,4185.470215,16.25
2021-04-17,61529.921875,62572.175781,60361.351562,60683.820312,60683.820312,66138760000.0,-846.101562,-0.014438,-0.038439,,...,,,,,0.0,,,,,
2021-04-18,60701.886719,61057.457031,52829.535156,56216.183594,56216.183594,97468870000.0,-4485.703125,-0.073622,-0.109231,,...,,,,,0.0,,,,,


> Depends on rolling window of calculations, we have some missing values at the top of the df. Filled with the next available value. (window choices are intuitive for now)

> Bitcoin is traded 24/7, where T-bills and notes are not. Weekend's missing values are filled with the previous day's value.

In [20]:
combined_df.ffill(inplace=True)

In [21]:
combined_df.backfill(inplace=True)

  combined_df.backfill(inplace=True)


In [22]:
combined_df.to_csv("features.csv")