In [5]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller

In [2]:
ls

README.md       credentials.py  creds.py        ffrs.ipynb


In [6]:
import os
import requests
import credentials

# FRED api url with series id
url = 'https://api.stlouisfed.org/fred/series/observations?series_id=EFFR&api_key=' + credentials.apikey + '&file_type=json'

# make call to FRED api
response = requests.get(url)

# convert response to json
data = response.json()

# convert json to a pandas dataframe
ffr = pd.DataFrame(data['observations'])

# cleanup
ffr = ffr[ffr.date > '2022-04-06']
ffr = ffr [['date', 'value']]
ffr = ffr.rename(columns = {'value': 'DFF', 'date': 'Date'})
ffr = ffr.reset_index(drop = True)
ffr.DFF = pd.to_numeric(ffr.DFF.str.replace(".", ""), errors='coerce')


In [27]:
import requests
from bs4 import BeautifulSoup

# URL to webscrape
url = 'https://www.cmegroup.com/markets/interest-rates/cme-fedwatch-tool.html'

In [None]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import time

driver = webdriver.Chrome(executable_path='C:/temp/chromedriver.exe')
driver.set_page_load_timeout(300)

driver.get("https://www.cmegroup.com/trading/interest-rates/countdown-to-fomc.html")

time.sleep(2)

driver.switch_to.frame(driver.find_element(By.TAG_NAME,"iframe"))
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
element = driver.find_element(By.LINK_TEXT, "Downloads")
driver.execute_script("arguments[0].click();", element)

time.sleep(2)

links = driver.find_elements(By.TAG_NAME, "a")

data = {}
for link in links:
    data[link.text] = link.get_attribute('href')
    
driver.quit()


In [None]:
data

In [None]:
reports = {}
for k,v in data.items():
    if '20' in k:
        reports[k] = v

In [None]:
dfs = {}
for key, url in reports.items():
    dfs[key] = pd.read_csv(url)
    dfs[key]['contract'] = key

In [None]:
for k,v in dfs.items():
    print(len(v))

In [None]:
len(dfs)

In [None]:
df = pd.concat([i for i in dfs.values()])

In [None]:
df.sort_values(by = 'Date', ascending = True)

In [None]:
df = df[df['Date'].notna()]

In [None]:
rates = [i for i in df.columns if i != 'Date' and i != 'contract']

In [None]:
rateset = list(set(rates))

In [None]:
ratevals = []
for i in rates:
    a = i.replace('(', "").replace(")","").split('-')[0]
    if float(a) < 25:
        a = float(a) * 1000
    
    b = i.replace('(', "").replace(")","").split('-')[1]
    if float(b) < 25:
        b = float(b) * 1000

    val = (int(a) + int(b)) / 2
    ratevals.append(str(val))

In [None]:
rename = dict(zip(rates, ratevals))

In [None]:
ffr['Date'] =  pd.to_datetime(ffr['Date'], infer_datetime_format=True)
df['Date'] =  pd.to_datetime(df['Date'], infer_datetime_format=True)
df['contract'] =  pd.to_datetime(df['contract'], infer_datetime_format=True)

In [None]:
df.rename(columns = rename, inplace = True)

In [None]:
df.columns.sort_values()

In [None]:
df = df.reindex(sorted(df.columns), axis=1)

In [None]:
df = df.join(ffr.set_index('Date'), on='Date')

In [None]:
df.sort_values(by = 'Date', ascending = True)

In [None]:
import yfinance as yf
btc = yf.download('BTC-USD', start='2022-04-07', end='2023-04-06')

In [None]:
btc.drop(columns = [i for i in btc.columns if i != 'Open' and i!= 'Volume'], inplace = True)

In [None]:
btc.rename(columns = {'Open': 'btcopen', 'Volume':'btcvolume'}, inplace = True)

In [None]:
df

In [None]:
btc['Date'] =  pd.to_datetime(btc.index, infer_datetime_format=True)

In [None]:
df = df.join(btc.set_index('Date'), on='Date')

In [None]:
df[df.Date == '2023-03-07']

In [None]:
df.contract.value_counts()

In [None]:
for i in df.iloc[:, :83].columns:
    i = float(i)

In [None]:
df['Date'] =  pd.to_datetime(df['Date'], infer_datetime_format=True)

In [None]:
df.iloc[:, :83] = df.iloc[:, :83].applymap(pd.to_numeric, errors='coerce')

In [None]:
df.columns = [float(i) if isinstance(i, str) and i in ratevals else i for i in df.columns]

In [None]:
ratevals = [float(i) for i in ratevals]

In [None]:
df['mean'] = df[ratevals].apply(lambda x: x*df[ratevals].columns.values, axis=1).sum(axis=1)

In [None]:
df.sort_values(by = 'mean', ascending = False)

In [None]:
df

In [None]:
df.iloc[:, 83:89]

In [None]:
smalldf = df.iloc[:, 83:89]

In [None]:
smalldf.dropna(inplace = True)

In [None]:
smalldf

In [None]:
smalldf.plot.scatter('Date', 'DFF');

In [None]:
smalldf.plot.scatter('Date', 'mean');

In [None]:
new_df = df.pivot(index='Date', columns='contract', values='mean')

new_df = new_df.join(ffr.set_index('Date'), on='Date')

In [None]:
[i for i in new_df.columns if i != 'DFF']

In [None]:
import seaborn as sns
sns.heatmap(new_df[[i for i in new_df.columns if i != 'DFF']]);

In [None]:
import plotly.graph_objects as go

In [None]:
x = [i for i in new_df.columns if i != 'DFF']
y = new_df.index
z = new_df.to_numpy()

fig = go.Figure(data=[go.Surface(z=z, x=x, y=y)])
fig.update_layout(title='FFR Curves',
                  width = 1200,
                  height = 1200,
                  scene = {"aspectratio": {"x": 1, "y": 1, "z": .6}})
fig.update_coloraxes(cauto=True)
#fig.update_coloraxes(cmax=2000)
#fig.update_coloraxes(cmin=2000)
fig.show()

# Cohorting

In [None]:
contractvals = sorted(list({i for i in df.contract}))

In [None]:
contractvals

In [None]:
smalldf2 = smalldf
smalldf2['dtm'] = (smalldf2.contract - smalldf2.Date)

In [None]:
smalldf2

In [None]:
from datetime import datetime
smalldf2.dtm = smalldf2.dtm.dt.days

In [None]:
smalldf2.dtm

In [None]:
# useless
smalldf.plot.scatter('mean', 'DFF');

In [None]:
smalldf2['diff'] = smalldf2['mean'] - smalldf2.DFF

In [None]:
smalldf2

In [None]:
smalldf2.dtypes

In [None]:

import plotly.express as px
fig = px.scatter_3d(smalldf2, x='Date', y='contract', z='diff', color='mean', size='DFF')
fig.update_layout(title='spreads between FFR and expected FFR: by date, by contract',
                  width = 1200,
                  height = 1200,
                  scene = {"aspectratio": {"x": 1, "y": 1, "z": .6}})
fig.update_coloraxes(cauto=True)
fig.show()

In [None]:
# premium if you're buying the contract (more time you have (orange and yellow) premium for cost of time when you don't know future rates)

In [None]:
smalldf2.Date.dtype

In [None]:
smalldf2

# Timeseries

In [None]:
from statsmodels.tsa.api import VAR

In [None]:
def plot_series(df, cols=None, title='Title', xlab=None, ylab=None, steps=1):
    
    # Set figure size to be (18, 9).
    plt.figure(figsize=(18,9))
    legend_list = []
    
    # Iterate through each column name.
    for col in cols:
            
        # Generate a line plot of the column name.
        # You only have to specify Y, since our
        # index will be a datetime index.
        plt.plot(smalldf2[col])
        legend_list.append(col)
        
    # Generate title and labels.
    plt.title(title, fontsize=26)
    plt.xlabel(xlab, fontsize=20)
    plt.ylabel(ylab, fontsize=20)
    
    # Enlarge tick marks.
    plt.yticks(fontsize=18)
    plt.xticks(df.index[0::steps], fontsize=18)
    
    plt.legend(legend_list);

In [None]:
'''plot_series(smalldf2,
            ['btcopen'],
            title=" price",
            steps=50)
'''

# Dickey Fuller

In [None]:
def interpret_dftest(dftest):
    dfoutput = pd.Series(dftest[0:3], index=['Test Statistic','p-value', 'Lag Used'])
    return dfoutput

In [None]:
interpret_dftest(adfuller(smalldf2['btcopen']))

# Regressions
This LR is just a placeholder, i don't appreciate the methodology

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.linear_model import LinearRegression

In [None]:
smalldf2

In [None]:
dummies = ['contract']

In [None]:
smalldf3 = pd.get_dummies(data = smalldf2, columns = dummies, drop_first = True)

In [None]:
xvars = [i for i in smalldf3.columns if i != 'btcopen' and i != 'Date']

In [None]:
X = smalldf3[xvars]
y = smalldf3['btcopen']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

In [None]:
lr = LinearRegression()

lr.fit(X_train, y_train)
print(lr.score(X_train, y_train))
print(lr.score(X_test, y_test))

In [None]:
yhat = lr.predict(X)

residuals = y - yhat

plt.hist(residuals, bins = 50);
plt.title('distribution of residuals - histogram');
#residuals are.... kind of normal, mean is not 0

print(type(y), type(residuals), type(yhat))

In [None]:
sns.residplot(x = yhat,
              y = residuals,
              lowess = True, #help visualize relationship
              line_kws = {'color':'red'})\
    .set(title = 'residuals vs. fits',
        xlabel = 'predicted',
        ylabel = 'residual');

# hmm