# Preparing for Project 3

This notebook (eventually) creates arrays containing daily returns for
- MKTRF
- RF
- HML
- SMB
- Bitcoin
- Ethereum
- Gold

Note: I spend most of the code working with lists and then convert into arrays at the end

In [45]:
import numpy as np

### Daily Fama-French Factor returns

- The code immediately below imports daily factor returns that I recently downloaded from Professor Ken French's website
- It provides MKTRF, RF, HML, and SMB
- I retain daily returns for January 2000 to September 2019

In [46]:
f = open('Project3_F-F_Factors_daily.txt', 'r')
f_list = f.readlines()
f.close()

In [47]:
print(f_list[0:5])

['DATE,MKTRF,SMB,HML,RF\n', '19260701,0.10,-0.24,-0.28,0.009\n', '19260702,0.45,-0.32,-0.08,0.009\n', '19260706,0.17,0.27,-0.35,0.009\n', '19260707,0.09,-0.59,0.03,0.009\n']


In [48]:
ff1 = [element.rstrip().split(',') for element in f_list][1:]

In [49]:
print(ff1[0])
print(ff1[-1])

['19260701', '0.10', '-0.24', '-0.28', '0.009']
['20190930', '0.50', '-0.14', '-0.49', '0.009']


In [50]:
ff2 = [[l[0], float(l[1])/100, float(l[2])/100, float(l[3])/100, float(l[4])/100] for l in ff1]

In [51]:
print(ff2[0])
print(ff2[-1])

['19260701', 0.001, -0.0024, -0.0028000000000000004, 8.999999999999999e-05]
['20190930', 0.005, -0.0014000000000000002, -0.0049, 8.999999999999999e-05]


In [52]:
ff3 = sorted(l for l in ff2 if l[0][0:2]=='20')
print(ff3[0])
print(ff3[-1])

['20000103', -0.0070999999999999995, 0.0038, -0.0085, 0.00021]
['20190930', 0.005, -0.0014000000000000002, -0.0049, 8.999999999999999e-05]


In [53]:
ff_dates = [l[0] for l in ff3]   # list of US trading dates since January 2000

## Daily Bitcoin closing prices

- I use a dictionary to convert monthly abbreviations into numbers
- I use regular expressions to extract daily date strings, opening prices, and closing prices
- I retain (numerical) date strings and closing prices
- <b>Note: I only keep the subset of trading dates that appear in the US data from Professor French</b>

In [None]:
import re

In [54]:
# Maps month abbreviations into strings

mmm = {'Jan':'01', 'Feb':'02', 'Mar':'03', 'Apr':'04', 'May':'05', 'Jun':'06',
       'Jul':'07', 'Aug':'08', 'Sep':'09', 'Oct':'10', 'Nov':'11', 'Dec':'12'}
print(mmm.keys())

dict_keys(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])


In [55]:
# First line of the file: "Date	Open*	High	Low	Close**	Volume	Market Cap"

b = open('Project3_Bitcoin_daily.txt', 'r')
b_text = b.read()
b_text = b_text.replace(',','')
b.close()

In [56]:
# If we want all of the data preserved
bitcoin0 = re.findall('([A-Z][a-z]{2}) (\d{2}) (\d{4})\n([\d\.]+)\n([\d\.]+)\n([\d\.]+)\n([\d\.]+)\n([\d\.]+)\n([\d\.]+)',b_text)

# If we only want date, open, and close then we need to delete a few () but not change the pattern
bitcoin1 = re.findall('([A-Z][a-z]{2}) (\d{2}) (\d{4})\n([\d\.]+)\n[\d\.]+\n[\d\.]+\n([\d\.]+)\n[\d\.]+\n[\d\.]+',b_text)


In [57]:
print(bitcoin1[0], len(bitcoin1))

('Oct', '02', '2019', '8344.21', '8393.04') 2132


In [58]:
bitcoin2 = [[(tup[2]+mmm[tup[0]]+tup[1]), float(tup[4])] for tup in bitcoin1]   # list of lists

In [59]:
bitcoin3 = sorted(bitcoin2)
print(bitcoin3[0])
print(bitcoin3[1])
print(bitcoin3[-1])

['20131201', 955.85]
['20131202', 1043.33]
['20191002', 8393.04]


In [60]:
print(len(bitcoin3))
bitcoin4 = [l for l in bitcoin3 if l[0] in ff_dates]  # throw out dates for which there is no FF return
print(len(bitcoin4))

2132
1467


In [61]:
print(bitcoin4[0:5])

[['20131202', 1043.33], ['20131203', 1078.28], ['20131204', 1151.17], ['20131205', 1045.11], ['20131206', 829.45]]


## Daily Ethereum closing prices

- Applies the code above to Ethereum instead of Bitcoin

In [62]:
e = open('Project3_Ethereum_daily.txt', 'r')
e_text = e.read()
e_text = e_text.replace(',','')
e.close()

In [63]:
ethereum1 = re.findall('([A-Z][a-z]{2}) (\d{2}) (\d{4})\n([\d\.]+)\n[\d\.]+\n[\d\.]+\n([\d\.]+)\n[\d\.]+\n[\d\.]+',e_text)
print(ethereum1[0], len(ethereum1))

('Oct', '02', '2019', '177.22', '180.71') 1518


In [64]:
ethereum2 = [[(tup[2]+mmm[tup[0]]+tup[1]), float(tup[4])] for tup in ethereum1]   # list of lists

In [65]:
ethereum3 = sorted(ethereum2)
print(ethereum3[0])
print(ethereum3[-1])

['20150807', 2.77]
['20191002', 180.71]


In [66]:
print(len(ethereum3))
ethereum4 = [l for l in ethereum3 if l[0] in ff_dates]
print(len(ethereum4))

1518
1044


## Daily Gold closing prices

- Extracts data on gold prices downloaded from FRED
- We are able to read each row as its own line and then process
- Note: In my versin of these data, closing price equals -99 when market closed

In [67]:
g = open('Project3_Gold_daily.txt', 'r')
g_list = g.readlines()
g.close()

In [68]:
print(g_list[0:5])

['DATE,GOLDAMGBD228NLBM\n', '2013-12-02,1237.500\n', '2013-12-03,1219.000\n', '2013-12-04,1213.000\n', '2013-12-05,1234.000\n']


In [69]:
gold1 = [element.rstrip().split(',') for element in g_list][1:]

In [70]:
print(gold1[0])
print(gold1[-1])

['2013-12-02', '1237.500']
['2019-11-08', '1466.85']


In [71]:
gold2 = [[l[0].replace('-',''), float(l[1])] for l in gold1]

In [72]:
print(gold2[0])
print(gold2[-1])

['20131202', 1237.5]
['20191108', 1466.85]


In [73]:
gold3 = sorted(gold2)
print(gold3[0])
print(gold3[-1])

['20131202', 1237.5]
['20191108', 1466.85]


In [74]:
print(len(gold3))
gold4 = [l for l in gold3 if l[0] in ff_dates]
print(len(gold4))

print(gold4[0])
print(gold4[-1])

1550
1467
['20131202', 1237.5]
['20190930', 1487.6]


In [75]:
len([l for l in gold4 if l[1]==-99])   # count of missing closing prices

25

## Converting lists of closing prices into lists of daily returns

### Daily Bitcoin returns

In [76]:
btc_dret = []
for num in range(1,len(bitcoin4),1):
    btc_dret.append([bitcoin4[num][0], (bitcoin4[num][1]/bitcoin4[num-1][1] - 1)])    
print(btc_dret[0])
print(btc_dret[-1])

['20131203', 0.03349850957990275]
['20190930', 0.005092191447978456]


### Daily Ethereum returns

In [77]:
eth_dret = []
for num in range(1,len(ethereum4),1):
    eth_dret.append([ethereum4[num][0], (ethereum4[num][1]/ethereum4[num-1][1] - 1)])    
print(eth_dret[0])
print(eth_dret[-1])

['20150810', -0.7442425992779784]
['20190930', 0.029534657432316314]


### Daily Gold returns

Note: Uses last nonmissing closing price

In [78]:
gold_dret = []
for num in range(1,len(gold4),1):
    if gold4[num][1]==-99 or gold4[num-1][1]==-99:
        gold_dret.append([gold4[num][0], -99])
    else:
        gold_dret.append([gold4[num][0], (gold4[num][1]/gold4[num-1][1]) - 1])
print(gold_dret[0])
print(gold_dret[-1])

['20131203', -0.014949494949494935]
['20190930', -0.005714667646960603]


## Creating Array with Daily Return Data for 201510 to 201909

- These data will form the basis for Project3
- You will need to incorporate daily returns for at least one stock of your choosing
- That stock must trade on NYSE or AMEX and must have multiple daily closing prices for each of these months

In [79]:
ff_dates2 = [element for element in ff_dates if element[0:4] in ['2016','2017','2018','2019'] 
            or element[0:6] in ['201510','201511','201512']]

In [80]:
ff_dret2   = [element for element in ff3 if element[0] in ff_dates2]
btc_dret2  = [element for element in btc_dret if element[0] in ff_dates2]
eth_dret2  = [element for element in eth_dret if element[0] in ff_dates2]
gold_dret2 = [element for element in gold_dret if element[0] in ff_dates2]

In [81]:
print(len(ff_dret2), len(btc_dret2), len(eth_dret2), len(gold_dret2))

1006 1006 1006 1006


In [82]:
date     = np.array(ff_dates2)
ret_cols = np.array(["MKTRF","SMB","HML","RF","BTC","ETH","GOLD"])
ret      = np.zeros((len(ff_dates2),7))

for num in range(len(date)):
    ret[num,0] = ff_dret2[num][1]
    ret[num,1] = ff_dret2[num][2]
    ret[num,2] = ff_dret2[num][3]
    ret[num,3] = ff_dret2[num][4]
    ret[num,4] = btc_dret2[num][1]
    ret[num,5] = eth_dret2[num][1]
    ret[num,6] = gold_dret2[num][1]    

In [83]:
np.save('Project3_ret.npy', ret)
np.save('Project3_ret_cols', ret_cols)

In [84]:
ret.shape

(1006, 7)