In [1]:
# import initial libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from pandas_datareader import data as wb
import datetime
import math
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

  from pandas.util.testing import assert_frame_equal


In [2]:
# import market returns
apple = wb.DataReader('AAPL', data_source="yahoo", start= '2010/1/31', end='2020/04/10')
apple_1 = apple[['Volume', 'Adj Close']]
apple_2 = (apple['High'] - apple['Low']) / apple['High']
apple_data = pd.concat([apple_2, apple_1], axis = 'columns')
columns = ['bid_ask_spread', 'Volume', 'Adj Close']
apple_data.columns=columns

In [3]:
# import macro data from fed
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2020, 4, 1)
gdp = wb.DataReader("GDP", "fred", start, end).resample('M').mean()
industrial_production = wb.DataReader("INDPRO", "fred", start, end).resample('M').mean()
unemployment = wb.DataReader("UNRATE", "fred", start, end).resample('M').mean()
balance_sheet = wb.DataReader("WALCL", "fred", start, end).resample('M').mean()
consumption = wb.DataReader("PCE", "fred", start, end).resample('M').mean()
m1 = wb.DataReader("M1", "fred", start, end).resample('M').mean()
m2 = wb.DataReader("M2", "fred", start, end).resample('M').mean() 

# concat macro data export to excel
macro = pd.concat([gdp, industrial_production, unemployment, balance_sheet, consumption, m1, m2], axis = 'columns')

In [4]:
# import ffm 5 factor
path = Path('F-F_Research_Data_5_Factors_2x3_daily.CSV')
ffm_5 = pd.read_csv(path, index_col ='Date', infer_datetime_format=True)
ffm_5.index = pd.DatetimeIndex(ffm_5.index)


In [5]:
data = pd.concat([macro, apple_data, ffm_5], join = 'outer', axis = 'columns')

In [6]:
data.to_clipboard()

In [7]:
path = Path('test_data_set.CSV')
data_set = pd.read_csv(path, index_col ='Date', infer_datetime_format=True)
data_set.index = pd.DatetimeIndex(data_set.index)

In [8]:
previous_val = [None]*16

for index, row in data_set.iterrows():
    for i, item in enumerate(row):
        if not np.isnan(item):
            previous_val[i] = item
    
    valid_values = []
    for i, item in enumerate(row):
        if np.isnan(item):
            valid_values.append(previous_val[i])
        else:
            valid_values.append(item)
    
    data_set.loc[index] = valid_values
    
    valid_values = []


In [12]:
data_set.corr()

Unnamed: 0,GDP,INDPRO,UNRATE,WALCL,PCE,M1,M2,bid_ask_spread,Volume,Adj Close,Mkt-RF,SMB,HML,RMW,CMA,RF
GDP,1.0,0.91413,-0.968531,0.751141,0.99853,0.986113,0.993104,-0.091395,-0.711827,0.941641,-0.014206,-0.02889,-0.031274,-0.004698,-0.042103,0.816232
INDPRO,0.91413,1.0,-0.905695,0.773021,0.900758,0.915322,0.904241,-0.062013,-0.662346,0.853428,-0.012421,-0.038866,-0.031716,-0.007396,-0.044049,0.69786
UNRATE,-0.968531,-0.905695,1.0,-0.880777,-0.963358,-0.989531,-0.982869,0.110018,0.754684,-0.86724,0.013553,0.029229,0.023362,0.002189,0.043744,-0.678516
WALCL,0.751141,0.773021,-0.880777,1.0,0.736287,0.829054,0.797441,-0.136857,-0.721485,0.59695,-0.013921,-0.024738,-0.012248,0.003641,-0.053043,0.302776
PCE,0.99853,0.900758,-0.963358,0.736287,1.0,0.983551,0.992202,-0.091776,-0.705378,0.94499,-0.014289,-0.029828,-0.030151,-0.003425,-0.040631,0.824805
M1,0.986113,0.915322,-0.989531,0.829054,0.983551,1.0,0.997311,-0.109763,-0.737928,0.902997,-0.011233,-0.027591,-0.021673,-0.006313,-0.042389,0.734145
M2,0.993104,0.904241,-0.982869,0.797441,0.992202,0.997311,1.0,-0.106541,-0.728605,0.920016,-0.011718,-0.027558,-0.024537,-0.00598,-0.042283,0.764193
bid_ask_spread,-0.091395,-0.062013,0.110018,-0.136857,-0.091776,-0.109763,-0.106541,1.0,0.491004,-0.082386,-0.17646,-0.040301,-0.00079,0.068826,0.056034,-0.016033
Volume,-0.711827,-0.662346,0.754684,-0.721485,-0.705378,-0.737928,-0.728605,0.491004,1.0,-0.612362,-0.069354,-0.016443,0.013842,0.035665,0.057852,-0.449236
Adj Close,0.941641,0.853428,-0.86724,0.59695,0.94499,0.902997,0.920016,-0.082386,-0.612362,1.0,-0.011292,-0.034593,-0.046481,-0.011572,-0.046384,0.816008
