In [322]:
import pandas_datareader as web
import math
import numpy as np
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler
import requests as req
import datetime as dt
import pytz 
import csv
import os

In [323]:
asset_list = ['SPY', 'QQQ']

In [324]:
if os.path.isdir('data') == False:
    os.mkdir('data')
    os.mkdir('./data/gen')
    os.mkdir('./data/asset')

In [325]:
est = pytz.timezone('US/Eastern')
now = dt.datetime.now(tz=est)

time_parts = str(now).split(" ")
current_date = time_parts[0]
print(current_date)

2021-12-23


In [326]:
start_date = '2010-01-01'
end_date = current_date

In [327]:
delta = dt.timedelta(1)
temp_date = now - delta
temp_date = str(temp_date)[0:10]


fed_fund_url = f"https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1168&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=DFF&scale=left&cosd={start_date}&coed={temp_date}&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Daily%2C%207-Day&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2021-12-22&revision_date=2021-12-22&nd=1954-07-01"

In [328]:
fed_funds_data = req.get(fed_fund_url)
decoded_content = fed_funds_data.content.decode('utf-8')

cr = csv.reader(decoded_content.splitlines(), delimiter=',')
fed_funds_list = list(cr)
fed_funds_list.append([temp_date, None])

In [329]:
fed_funds_df = pd.DataFrame(fed_funds_list)
fed_funds_df = fed_funds_df.rename(columns={0: 'Date', 1: 'DFF'})
fed_funds_df.drop(index=fed_funds_df.index[0], axis=0, inplace=True)
fed_funds_df.reindex()
fed_funds_df

Unnamed: 0,Date,DFF
1,2010-01-01,0.05
2,2010-01-02,0.05
3,2010-01-03,0.05
4,2010-01-04,0.12
5,2010-01-05,0.12
...,...,...
4371,2021-12-19,0.08
4372,2021-12-20,0.08
4373,2021-12-21,0.08
4374,2021-12-22,0.08


In [330]:
# ^FVX = 5 year treasury
fvx = web.DataReader('^FVX', data_source='yahoo', start = start_date, end = end_date)
fvx = pd.DataFrame(fvx)
fvx['Date'] = fvx.index
fvx.reset_index(drop=True, inplace=True)
fvx = fvx[['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close']]
fvx

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2010-01-04,2.684,2.633,2.684,2.652,0.0,2.652
1,2010-01-05,2.593,2.549,2.583,2.558,0.0,2.558
2,2010-01-06,2.625,2.558,2.566,2.573,0.0,2.573
3,2010-01-07,2.642,2.574,2.625,2.600,0.0,2.600
4,2010-01-08,2.654,2.527,2.642,2.566,0.0,2.566
...,...,...,...,...,...,...,...
3008,2021-12-17,1.193,1.142,1.157,1.177,0.0,1.177
3009,2021-12-20,1.165,1.125,1.142,1.164,0.0,1.164
3010,2021-12-21,1.239,1.185,1.185,1.235,0.0,1.235
3011,2021-12-22,1.243,1.212,1.235,1.222,0.0,1.222


In [331]:
# ^TNX = 10 year treasury
tnx = web.DataReader('^TNX', data_source='yahoo', start = start_date, end = end_date)
tnx = pd.DataFrame(tnx)
tnx['Date'] = tnx.index
tnx.reset_index(drop=True, inplace=True)
tnx = tnx[['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close']]
tnx

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2010-01-04,3.859,3.808,3.859,3.841,0.0,3.841
1,2010-01-05,3.800,3.749,3.790,3.755,0.0,3.755
2,2010-01-06,3.837,3.761,3.771,3.808,0.0,3.808
3,2010-01-07,3.859,3.800,3.845,3.822,0.0,3.822
4,2010-01-08,3.851,3.775,3.843,3.808,0.0,3.808
...,...,...,...,...,...,...,...
3008,2021-12-17,1.412,1.372,1.395,1.402,0.0,1.402
3009,2021-12-20,1.421,1.375,1.387,1.419,0.0,1.419
3010,2021-12-21,1.498,1.448,1.450,1.487,0.0,1.487
3011,2021-12-22,1.481,1.448,1.477,1.457,0.0,1.457


In [332]:
# ^VIX = Volatility index
vix = web.DataReader('^VIX', data_source='yahoo', start = start_date, end = end_date)
vix = pd.DataFrame(vix)
vix['Date'] = vix.index
vix.reset_index(drop=True, inplace=True)
vix = vix[['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close']]
vix

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2010-01-04,21.680000,20.030001,21.680000,20.040001,0,20.040001
1,2010-01-05,20.129999,19.340000,20.049999,19.350000,0,19.350000
2,2010-01-06,19.680000,18.770000,19.590000,19.160000,0,19.160000
3,2010-01-07,19.709999,18.700001,19.680000,19.059999,0,19.059999
4,2010-01-08,19.270000,18.110001,19.270000,18.129999,0,18.129999
...,...,...,...,...,...,...,...
3011,2021-12-17,23.260000,20.490000,20.700001,21.570000,0,21.570000
3012,2021-12-20,27.389999,22.850000,25.889999,22.870001,0,22.870001
3013,2021-12-21,22.680000,20.900000,22.280001,21.010000,0,21.010000
3014,2021-12-22,21.360001,18.590000,21.040001,18.629999,0,18.629999


In [333]:
data_list_raw = []

for each in asset_list:
    
    data_list_raw.append(web.DataReader(each, data_source='yahoo', start = start_date, end = end_date))


In [334]:
tmp_list_vix = vix['Date'].to_list()
tmp_list_tnx = tnx['Date'].to_list()
tmp_list_fvx = fvx['Date'].to_list()
tmp_list_ff = fed_funds_df['Date'].to_list()


In [335]:
print (len(tmp_list_vix))
print (len(tmp_list_tnx))
print (len(tmp_list_fvx))
print (len(tmp_list_ff))

3016
3013
3013
4375


In [336]:
for i in range(0, len(tmp_list_tnx) - 1):
    for j in range (0, len(tmp_list_vix) - 1):
        if tmp_list_vix[j] == tmp_list_tnx[i]:
            tmp_list_vix.pop(j)

In [337]:
tmp_list_vix.pop(len(tmp_list_vix) - 1)
print(len(tmp_list_vix))
count = 0
for each in tmp_list_vix:
    print (each)
    count = count + 1
    ind = vix.index[vix['Date'] == each]
    vix.drop(index=ind, axis=0, inplace=True)
    print ("Dropped:", count)

3
2010-10-11 00:00:00
Dropped: 1
2016-10-10 00:00:00
Dropped: 2
2016-11-11 00:00:00
Dropped: 3


In [338]:
print(vix.shape[0])
print(tnx.shape[0])
print(fvx.shape[0])
print(fed_funds_df.shape[0])

3013
3013
3013
4375


In [339]:
for i in range(0, len(tmp_list_tnx) - 1):
    for j in range (0, len(tmp_list_ff) - 1):
        if tmp_list_ff[j] == str(tmp_list_tnx[i])[0:10]:
            tmp_list_ff.pop(j)

In [340]:
tmp_list_ff.pop(len(tmp_list_ff) - 1)
print(len(tmp_list_ff))
count = 0
for each in tmp_list_ff:
    print (each)
    count = count + 1
    ind = fed_funds_df.index[fed_funds_df['Date'] == each]
    fed_funds_df.drop(index=ind, axis=0, inplace=True)
    print ("Dropped:", count)

1362
2010-01-01
Dropped: 1
2010-01-02
Dropped: 2
2010-01-03
Dropped: 3
2010-01-09
Dropped: 4
2010-01-10
Dropped: 5
2010-01-16
Dropped: 6
2010-01-17
Dropped: 7
2010-01-18
Dropped: 8
2010-01-23
Dropped: 9
2010-01-24
Dropped: 10
2010-01-30
Dropped: 11
2010-01-31
Dropped: 12
2010-02-06
Dropped: 13
2010-02-07
Dropped: 14
2010-02-13
Dropped: 15
2010-02-14
Dropped: 16
2010-02-15
Dropped: 17
2010-02-20
Dropped: 18
2010-02-21
Dropped: 19
2010-02-27
Dropped: 20
2010-02-28
Dropped: 21
2010-03-06
Dropped: 22
2010-03-07
Dropped: 23
2010-03-13
Dropped: 24
2010-03-14
Dropped: 25
2010-03-20
Dropped: 26
2010-03-21
Dropped: 27
2010-03-27
Dropped: 28
2010-03-28
Dropped: 29
2010-04-02
Dropped: 30
2010-04-03
Dropped: 31
2010-04-04
Dropped: 32
2010-04-10
Dropped: 33
2010-04-11
Dropped: 34
2010-04-17
Dropped: 35
2010-04-18
Dropped: 36
2010-04-24
Dropped: 37
2010-04-25
Dropped: 38
2010-05-01
Dropped: 39
2010-05-02
Dropped: 40
2010-05-08
Dropped: 41
2010-05-09
Dropped: 42
2010-05-15
Dropped: 43
2010-05-16
Drop

In [341]:
print(data_list_raw[0].shape[0])
print(vix.shape[0])
print(tnx.shape[0])
print(fvx.shape[0])
print(fed_funds_df.shape[0])

3016
3013
3013
3013
3013


In [342]:
for each in data_list_raw:
    each = pd.DataFrame(each)
    each['Date'] = each.index
    each.reset_index(drop=True, inplace=True)
    each = each[['Date', 'High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close']]
    print (each)

           Date        High         Low        Open       Close       Volume  \
0    2010-01-04  113.389999  111.510002  112.370003  113.330002  118944600.0   
1    2010-01-05  113.680000  112.849998  113.260002  113.629997  111579900.0   
2    2010-01-06  113.989998  113.430000  113.519997  113.709999  116074400.0   
3    2010-01-07  114.330002  113.180000  113.500000  114.190002  131091100.0   
4    2010-01-08  114.620003  113.660004  113.889999  114.570000  126402800.0   
...         ...         ...         ...         ...         ...          ...   
3011 2021-12-17  464.739990  458.059998  461.549988  459.869995  135511600.0   
3012 2021-12-20  455.399994  451.140015  454.480011  454.980011  107134800.0   
3013 2021-12-21  463.209991  456.309998  458.609985  463.059998   69806300.0   
3014 2021-12-22  467.809998  462.579987  462.790009  467.690002   58890200.0   
3015 2021-12-23  472.190002  468.640015  468.750000  470.600006   49083371.0   

       Adj Close  
0      89.888649  
1

In [343]:
raw_drop_list = []
for each in data_list_raw:
    raw_drop_list.append(each['Date'].to_list())

In [344]:

for a in range(0, len(raw_drop_list)):
    
    for i in range(0, len(tmp_list_tnx) - 1):
        for j in range (0, len(raw_drop_list[a]) - 1):
            if raw_drop_list[a][j] == tmp_list_tnx[i]:
                raw_drop_list[a].pop(j)

In [345]:
inc = 0

for each in data_list_raw:

    raw_drop_list[inc].pop(len(raw_drop_list[inc]) - 1)
    print(len(raw_drop_list[inc]))
    
    count = 0
    
    for one in raw_drop_list[inc]:
        print (each)
        count = count + 1
        ind = each.index[each['Date'] == one]
        each.drop(index=ind, axis=0, inplace=True)
        print ("Dropped:", count)
        
    inc = inc + 1

3
            High         Low        Open       Close       Volume   Adj Close  \
0     113.389999  111.510002  112.370003  113.330002  118944600.0   89.888649   
1     113.680000  112.849998  113.260002  113.629997  111579900.0   90.126579   
2     113.989998  113.430000  113.519997  113.709999  116074400.0   90.190033   
3     114.330002  113.180000  113.500000  114.190002  131091100.0   90.570740   
4     114.620003  113.660004  113.889999  114.570000  126402800.0   90.872177   
...          ...         ...         ...         ...          ...         ...   
3011  464.739990  458.059998  461.549988  459.869995  135511600.0  459.869995   
3012  455.399994  451.140015  454.480011  454.980011  107134800.0  454.980011   
3013  463.209991  456.309998  458.609985  463.059998   69806300.0  463.059998   
3014  467.809998  462.579987  462.790009  467.690002   58890200.0  467.690002   
3015  472.190002  468.640015  468.750000  470.600006   49083371.0  470.600006   

           Date  
0    20

In [346]:
print(data_list_raw[0].shape[0])
print(vix.shape[0])
print(tnx.shape[0])
print(fvx.shape[0])
print(fed_funds_df.shape[0])

3013
3013
3013
3013
3013


In [347]:
vix.to_csv('./data/gen/vix.csv')
tnx.to_csv('./data/gen/tnx.csv')
fvx.to_csv('./data/gen/fvx.csv')
fed_funds_df.to_csv('./data/gen/fed_funds.csv')

count = 0
for each in data_list_raw:
    each.to_csv('./data/asset/' + asset_list[count] + '.csv')
    count = count + 1