In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import pandas_datareader as pdr
import ta

In [2]:
start_date = "2021-04-14"  # day we start to have Coinbase news
today = pd.Timestamp.today().strftime("%Y-%m-%d")

### Yield of 10-year Treasury Notes (long-term risk-free rate)

In [3]:
ten_year_treasury_rate = pdr.data.DataReader("DGS10", "fred", start_date, today)
ten_year_treasury_rate.head()

Unnamed: 0_level_0,DGS10
DATE,Unnamed: 1_level_1
2021-04-14,1.64
2021-04-15,1.56
2021-04-16,1.59
2021-04-19,1.61
2021-04-20,1.58


### Yield on 3-month U.S. Treasury bills (short-term risk-free rate)

In [4]:
three_month_tbill_yield = pdr.data.DataReader("DTB3", "fred", start_date, today)
three_month_tbill_yield.head()

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
2021-04-14,0.02
2021-04-15,0.02
2021-04-16,0.02
2021-04-19,0.02
2021-04-20,0.03


### 5-year breakeven inflation rate 

In [5]:
five_year_breakeven_inflation = pdr.data.DataReader("T5YIE", "fred", start_date, today)
five_year_breakeven_inflation.head()

Unnamed: 0_level_0,T5YIE
DATE,Unnamed: 1_level_1
2021-04-14,2.56
2021-04-15,2.55
2021-04-16,2.57
2021-04-19,2.56
2021-04-20,2.52


### Close price for S&P 500 and VIX

In [6]:
sp500_close = yf.download("^GSPC", start=start_date, end=today)["Close"].rename(
    "S&P500 Close"
)
vix_close = yf.download("^VIX", start=start_date, end=today)["Close"].rename(
    "VIX Close"
)
sp500_close.head()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Date
2021-04-14    4124.660156
2021-04-15    4170.419922
2021-04-16    4185.470215
2021-04-19    4163.259766
2021-04-20    4134.939941
Name: S&P500 Close, dtype: float64

### BTC features

In [7]:
btc = yf.download("BTC-USD", start=start_date, end=today)
btc.head()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-14,63523.753906,64863.097656,61554.796875,63109.695312,63109.695312,77451779687
2021-04-15,63075.195312,63821.671875,62208.964844,63314.011719,63314.011719,60954381579
2021-04-16,63258.503906,63594.722656,60222.53125,61572.789062,61572.789062,84293007468
2021-04-17,61529.921875,62572.175781,60361.351562,60683.820312,60683.820312,66138759198
2021-04-18,60701.886719,61057.457031,52829.535156,56216.183594,56216.183594,97468872758


#### 1. Close - Open price

In [8]:
btc["close_open"] = btc["Close"] - btc["Open"]

#### 2. Open, High, Low, Close, Adj Close, Trading Volume

#### 3. Cumulative return

In [9]:
btc["Daily Return"] = btc["Close"].pct_change()
btc["cumulative_return"] = (1 + btc["Daily Return"]).cumprod() - 1

#### 4. Price volatility by 30-day moving standard deviation

In [10]:
btc["30D_Moving_STD"] = btc["Close"].rolling(window=30).std()

#### 5. Parkinson Volatility

In [11]:
btc["Parkinson_Volatility"] = np.sqrt(
    (1 / (4 * np.log(2))) * (np.log(btc["High"] / btc["Low"]) ** 2)
)

#### 6. Relative intraday price change

In [12]:
btc["Relative_Intraday_Price_Change"] = (
    (btc["Close"] - btc["Open"]) / btc["Open"]
) * 100

#### 7. Bollinger Bands

In [13]:
btc["Middle Band"] = btc["Close"].rolling(window=30).mean()
std_close = btc["Close"].rolling(window=30).std()
btc["Upper Band"] = btc["Middle Band"] + (std_close * 2)
btc["Lower Band"] = btc["Middle Band"] - (std_close * 2)

#### 8.Relative Strength Index (RSI)

In [14]:
btc["rsi"] = ta.momentum.RSIIndicator(close=btc["Close"], window=14).rsi()

#### 9. Stochastic oscillator

In [15]:
stoch = ta.momentum.StochasticOscillator(
    high=btc["High"], low=btc["Low"], close=btc["Close"], window=14, smooth_window=3
)
btc["stoch_%K"] = stoch.stoch()
btc["stoch_%D"] = stoch.stoch_signal()

#### 10. MACD

In [16]:
btc["macd"] = ta.trend.MACD(close=btc["Close"]).macd()
btc["macd_signal"] = ta.trend.MACD(close=btc["Close"]).macd_signal()

#### 11. Average True Range (ATR)

In [17]:
btc["ATR"] = ta.volatility.AverageTrueRange(
    high=btc["High"], low=btc["Low"], close=btc["Close"], window=14
).average_true_range()

### Concatenate all features

In [18]:
combined_df = pd.concat(
    [
        btc,
        ten_year_treasury_rate,
        three_month_tbill_yield,
        five_year_breakeven_inflation,
        sp500_close,
        vix_close,
    ],
    axis=1,
    join="outer",
)

combined_df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,close_open,Daily Return,cumulative_return,30D_Moving_STD,...,stoch_%K,stoch_%D,macd,macd_signal,ATR,DGS10,DTB3,T5YIE,S&P500 Close,VIX Close
2021-04-14,63523.753906,64863.097656,61554.796875,63109.695312,63109.695312,77451780000.0,-414.058594,,,,...,,,,,0.0,1.64,0.02,2.56,4124.660156,16.99
2021-04-15,63075.195312,63821.671875,62208.964844,63314.011719,63314.011719,60954380000.0,238.816406,0.003237,0.003237,,...,,,,,0.0,1.56,0.02,2.55,4170.419922,16.57
2021-04-16,63258.503906,63594.722656,60222.53125,61572.789062,61572.789062,84293010000.0,-1685.714844,-0.027501,-0.024353,,...,,,,,0.0,1.59,0.02,2.57,4185.470215,16.25
2021-04-17,61529.921875,62572.175781,60361.351562,60683.820312,60683.820312,66138760000.0,-846.101562,-0.014438,-0.038439,,...,,,,,0.0,,,,,
2021-04-18,60701.886719,61057.457031,52829.535156,56216.183594,56216.183594,97468870000.0,-4485.703125,-0.073622,-0.109231,,...,,,,,0.0,,,,,


> Depends on rolling window of calculations, we have some missing values at the top of the df. Filled with the next available value. (window choices are intuitive for now)

> Bitcoin is traded 24/7, where T-bills and notes are not. Weekend's missing values are filled with the previous day's value.

### S&P 500 Return

In [19]:
combined_df["S&P500 Return"] = combined_df["S&P500 Close"].pct_change()

  combined_df["S&P500 Return"] = combined_df["S&P500 Close"].pct_change()


### Lag 1 and 2 days of inflation, treasury, S&P 500 and its return, and VIX

In [20]:
combined_df["DGS10_lag1"] = combined_df["DGS10"].shift(1)
combined_df["DGS10_lag2"] = combined_df["DGS10"].shift(2)

In [21]:
combined_df["DTB3_lag1"] = combined_df["DTB3"].shift(1)
combined_df["DTB3_lag2"] = combined_df["DTB3"].shift(2)

In [22]:
combined_df["T5YIE_lag1"] = combined_df["T5YIE"].shift(1)
combined_df["T5YIE_lag2"] = combined_df["T5YIE"].shift(2)

In [23]:
combined_df["S&P500_Close_lag1"] = combined_df["S&P500 Close"].shift(1)
combined_df["S&P500_Close_lag2"] = combined_df["S&P500 Close"].shift(2)

In [24]:
combined_df["VIX_Close_lag1"] = combined_df["VIX Close"].shift(1)
combined_df["VIX_Close_lag2"] = combined_df["VIX Close"].shift(2)

In [25]:
combined_df["S&P_Return_lag1"] = combined_df["S&P500 Return"].shift(1)
combined_df["S&P_Return_lag2"] = combined_df["S&P500 Return"].shift(2)

In [26]:
combined_df.ffill(inplace=True)

In [27]:
combined_df.backfill(inplace=True)

  combined_df.backfill(inplace=True)


In [28]:
combined_df.to_csv("features.csv")