In [49]:
from fin_data_fundamentals import find_fundamentals
from fin_data_fundamentals import get_fundamentals
from alpha_vantage.foreignexchange import ForeignExchange
from alpha_vantage.techindicators import TechIndicators
from alpha_vantage.timeseries import TimeSeries
from decouple import config
import pandas as pd
import numpy as np
import quandl
import datetime
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint

In [50]:
from fin_data import DailyTimeSeries

In [51]:
from keras.preprocessing.sequence import TimeseriesGenerator

In [52]:
tesla = DailyTimeSeries('TSLA')

df = tesla.initiate()

################################################################### 
 Ticker:  TSLA 
 Last Refreshed:  2019-09-06 10:23:55 
 Data Retrieved:  Daily Prices (open, high, low, close) and Volumes 
 ###################################################################


In [53]:
df.head()

Unnamed: 0_level_0,TSLA open,TSLA high,TSLA low,TSLA close,TSLA volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-06-29,19.0,24.9984,17.54,23.89,18766300.0
2010-06-30,25.79,30.419,23.3,23.83,17187100.0
2010-07-01,25.0,25.92,20.27,21.96,8218800.0
2010-07-02,23.0,23.0998,18.71,19.2,5139800.0
2010-07-06,20.0,20.0,15.83,16.11,6866900.0


In [54]:
macro_list = ["housing_index", "trade_index", "confidence_index", "longterm_rates"]

In [55]:
df = tesla.add_macro(df, macro_list)

################################################################### 
 Index: Nominal Home Price Index Added 
 ###################################################################
################################################################### 
 Trade Weighted U.S. Dollar Index: Broad Added 
 ###################################################################
################################################################### 
 Index: Yale Investor Behavior Project Added 
 ###################################################################
################################################################### 
 US Treasury Bond Long-Term Rates Added 
 ###################################################################


In [56]:

fund_list = ["operatingrevenue", "totalrevenue", "netincome", "totaloperatingexpenses", "totalgrossprofit", "totaloperatingincome"]

In [57]:
df = tesla.add_fundamentals(df, fund_list)

################################################################### 
 Ticker:  TSLA 
 Fundamentals Retrieved:  ['TSLA open' 'TSLA high' 'TSLA low' 'TSLA close' 'TSLA volume'
 'housing_index' 'trade_value' 'conf_index' 'conf_index_SE' '10 Yrs Rates'
 '20-Yr Maturity Rate' 'TSLA_operatingrevenue' 'TSLA_totalrevenue'
 'TSLA_netincome' 'TSLA_totaloperatingexpenses' 'TSLA_totalgrossprofit'
 'TSLA_totaloperatingincome'] 
 ###################################################################
################################################################### 
 Ticker:  TSLA 
 Retrieved Data Start Date:  2011-11-14 
 Retrieved Data End Date:  2019-07-29 
 Data Retrieved:  ['TSLA_operatingrevenue', 'TSLA_totalrevenue', 'TSLA_netincome', 'TSLA_totaloperatingexpenses', 'TSLA_totalgrossprofit', 'TSLA_totaloperatingincome'] 
 ###################################################################


In [58]:
df.head()


Unnamed: 0_level_0,TSLA open,TSLA high,TSLA low,TSLA close,TSLA volume,housing_index,trade_value,conf_index,conf_index_SE,10 Yrs Rates,20-Yr Maturity Rate,TSLA_operatingrevenue,TSLA_totalrevenue,TSLA_netincome,TSLA_totaloperatingexpenses,TSLA_totalgrossprofit,TSLA_totaloperatingincome
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2010-06-29,19.0,24.9984,17.54,23.89,18766300.0,147.031,104.0133,65.43,3.47,3.63,3.76,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0
2010-06-30,25.79,30.419,23.3,23.83,17187100.0,147.695,104.4167,65.64,3.4,3.61,3.74,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0
2010-07-01,25.0,25.92,20.27,21.96,8218800.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0
2010-07-02,23.0,23.0998,18.71,19.2,5139800.0,147.695,104.4167,65.64,3.4,3.64,3.77,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0
2010-07-06,20.0,20.0,15.83,16.11,6866900.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0


In [59]:
df.shape

(2163, 17)

### Target Feature Engineering ###

- For now, we are using percent change as the target feature. In the future, we would be engineering more features that possibly encompass the data more accurately


In [73]:
df['percent_change'] = ((df['TSLA close'].shift(-1) - df['TSLA close']) / (df['TSLA close']))*100


df['percent_change'].head()

date
2010-06-29    -0.251151
2010-06-30    -7.847251
2010-07-01   -12.568306
2010-07-02   -16.093750
2010-07-06    -1.924271
Name: percent_change, dtype: float64

In [74]:
df.head()

Unnamed: 0_level_0,TSLA open,TSLA high,TSLA low,TSLA close,TSLA volume,housing_index,trade_value,conf_index,conf_index_SE,10 Yrs Rates,20-Yr Maturity Rate,TSLA_operatingrevenue,TSLA_totalrevenue,TSLA_netincome,TSLA_totaloperatingexpenses,TSLA_totalgrossprofit,TSLA_totaloperatingincome,percent_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2010-06-29,19.0,24.9984,17.54,23.89,18766300.0,147.031,104.0133,65.43,3.47,3.63,3.76,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-0.251151
2010-06-30,25.79,30.419,23.3,23.83,17187100.0,147.695,104.4167,65.64,3.4,3.61,3.74,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-7.847251
2010-07-01,25.0,25.92,20.27,21.96,8218800.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-12.568306
2010-07-02,23.0,23.0998,18.71,19.2,5139800.0,147.695,104.4167,65.64,3.4,3.64,3.77,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-16.09375
2010-07-06,20.0,20.0,15.83,16.11,6866900.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-1.924271


In [75]:
# For the last couple of months, tesla had a lot of missing fundamentals data, so we have th drop the null values.

print(df.isnull().sum())
print(df.shape)

df = df.dropna()

df.shape
# loses about 60 observations.

TSLA open                      0
TSLA high                      0
TSLA low                       0
TSLA close                     0
TSLA volume                    0
housing_index                  0
trade_value                    0
conf_index                     0
conf_index_SE                  0
10 Yrs Rates                   0
20-Yr Maturity Rate            0
TSLA_operatingrevenue          0
TSLA_totalrevenue              0
TSLA_netincome                 0
TSLA_totaloperatingexpenses    0
TSLA_totalgrossprofit          0
TSLA_totaloperatingincome      0
percent_change                 1
dtype: int64
(2103, 18)


(2102, 18)

### Multiple Features

- Let's try to have multiple features in the notebook to see if that helps the model.
- Time Series Forecasting As Supervised Learning:
- reframing the time sereis allows the access to standard linear and nonlinear machine learning algorithms
- Supervised learning: algorithm learning from the training dataset. Has specific features and targets with values.
- We have to make a sliding window i.e. taking in the previous time steps as input variables, we use the next time step as the output variable.
- Also knowns as sliding window method, window method or a lag method. 
- How the sliding window work would be we would have all 18 features to predict the percent change move. 
###### Multi-Step Forecasting:
- One-Step Forecast: where the next day's percent change is predicted
- Multi-Step Forecase: Two or more future time steps are to be predicted.


##### Pandas shift() Function#####

- in order to create lag observations to forecast observations for a time series dataset.
- Typically, current time(t) and future times (t+1, t+n) are forecast times and past observations (t-1, t-n) are used to make forecasts.


#### Normalize the dataset:

In [76]:
dataset = df.values

In [77]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit_transform(dataset.reshape(-1,1))

array([[0.18358664],
       [0.18358664],
       [0.18358664],
       ...,
       [0.30644219],
       [0.03711406],
       [0.18358664]])

#### Split the dataset into train and test sets

In [78]:
df.head()

Unnamed: 0_level_0,TSLA open,TSLA high,TSLA low,TSLA close,TSLA volume,housing_index,trade_value,conf_index,conf_index_SE,10 Yrs Rates,20-Yr Maturity Rate,TSLA_operatingrevenue,TSLA_totalrevenue,TSLA_netincome,TSLA_totaloperatingexpenses,TSLA_totalgrossprofit,TSLA_totaloperatingincome,percent_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2010-06-29,19.0,24.9984,17.54,23.89,18766300.0,147.031,104.0133,65.43,3.47,3.63,3.76,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-0.251151
2010-06-30,25.79,30.419,23.3,23.83,17187100.0,147.695,104.4167,65.64,3.4,3.61,3.74,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-7.847251
2010-07-01,25.0,25.92,20.27,21.96,8218800.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-12.568306
2010-07-02,23.0,23.0998,18.71,19.2,5139800.0,147.695,104.4167,65.64,3.4,3.64,3.77,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-16.09375
2010-07-06,20.0,20.0,15.83,16.11,6866900.0,147.695,104.4167,65.64,3.4,3.58,3.71,31241000.0,31241000.0,-34935000.0,47130000.0,9296000.0,-37834000.0,-1.924271


In [66]:
X = df.drop(columns='percent_change')
y = df[['percent_change']].values

In [67]:
#use Alex's code

def split(train_cut, val_cut, X=X, y=y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        train_size=train_cut, 
                                                        shuffle=False)

    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test,
                                                    train_size=val_cut,
                                                    shuffle=False)
    
    return X_train, X_test, X_val, y_train, y_test, y_val


In [68]:
X_train, X_test, X_val, y_train, y_test, y_val = split(train_cut=.8,
                                                      val_cut=.6)




In [69]:
train_data_generator = TimeseriesGenerator(X_train, y_train,
                                     length=17,
                                     sampling_rate=1,
                                     stride=1,
                                     batch_size=6)

test_data_generator = TimeseriesGenerator(X_test, y_test, 
                                          length=17, 
                                          sampling_rate=1,
                                          stride=1,
                                          batch_size=6)

val_data_generator = TimeseriesGenerator(X_val, y_val, 
                                          length=17, 
                                          sampling_rate=1,
                                          stride=1,
                                          batch_size=6)

In [79]:
X_train.shape

(1682, 17)

In [82]:
scaled = scaler.transform(df)

In [83]:
train = scaled[:1500, :]
test = scaled[1500:, :]

In [84]:
X_train, X_test = train[:1500,:17], test[:,:17] 

In [85]:
X_train.shape, X_test.shape

((1500, 17), (602, 17))

In [86]:
y_train, y_test = train[:1500, 17:18], test[:, 17:18]

In [87]:
y_train.shape, y_test.shape

((1500, 1), (602, 1))

In [88]:
look_back = 7 # 7 dats

### TimeseriesGenerator:
Data: takes in data argument i.e. X_train, should be 2D, axis 0 is the time dimension and has timesteps.
target: target for the model i.e. y_train
length: lenght of output sequences (in number of timesteps)
sampling_rate: period between successive timesteps within sequences.
stride: sequence between successive output sequences.
batch_size: number of timeseries samples in each batch, i.e. number of days you want to predict the target for.
can also use:
start_index: data points earlier than start_index would not be used in output sequence, this is to split the data for test or validation.
end_index: data points later than end_index would not be used in the output sequences, useful to reserve the data for test or validation.

In [89]:
# sampling_rate, and stride essentially refer to daily time series and how many values do you want to include in each sample. since it is a daily time series, we can keep the sampling_rate and stride to 1.

In [90]:
train_data_generator = TimeseriesGenerator(X_train, y_train, 
                                           length=17,
                                           sampling_rate=1, 
                                           stride=1, 
                                           batch_size=3)


In [91]:
test_data_generator = TimeseriesGenerator(X_test, y_test, 
                                          length=17, 
                                          sampling_rate=1,
                                          stride=1,
                                          batch_size =3)

### Let's Define the Model:

In [92]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [93]:

regressor = Sequential()

# Adding the LSTM layer

regressor.add(LSTM(32, return_sequences=True, input_shape=(train_data_generator.length, X_train.shape[1])))
regressor.add(Dropout(0.2))

# Adding the second layer

regressor.add(LSTM(32, return_sequences=True))
regressor.add(Dropout(0.2))

# Adding a third layer
regressor.add(LSTM(32, return_sequences=False))
regressor.add(Dropout(0.2))

# Adding the output layer
regressor.add(Dense(units=1))

# Compiling the RNN
regressor.compile(optimizer='adam', loss='mean_squared_error')

# Fitting the RNN to the Training set

history = regressor.fit_generator(train_data_generator, epochs=20)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## To see what your model looks:

from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)
from IPython.display import Image
Image(filename='model.png')

In [170]:
# history = model.fit_generator(train_data_generator, epochs=100).history

In [98]:
from keras.models import load_model

regressor.save('first_model.h')
model = load_model('first_model.h')

In [99]:
regressor.evaluate_generator(test_data_generator)

1.405754124615937e-08

In [103]:
train_pred = regressor.predict_generator(test_data_generator)
train_pred.shape

(585, 1)

In [104]:
test_pred = model.predict_generator(test_data_generator)
test_pred.shape

(585, 1)

In [105]:
## scale values back to real index/price range.

y = scaler.inverse_transform(train_pred)
test_pred = scaler.inverse_transform(test_pred)
# dataset_y = scaler.inverse_transform(dataset_y)


In [106]:
from sklearn.metrics import mean_squared_error

In [107]:
# calculating the room mean squared erro

train_score = math.sqrt(mean_squared_error(y_train[:,0], train_pred[:,0]))
print('Train Score: %0.2f RMSE' % (train_score))

test_score = math.sqrt(mean_squared_error(y_test[:, 0], test_pred[:, 0]))
print('Test Score: %.2f RMSE' % (test_score))

# this would print out the RMSE score

ValueError: Found input variables with inconsistent numbers of samples: [1500, 585]

In [None]:
# you can also plot the train and test predictions.


### PREVIOUS MODEL:

In [42]:
model = Sequential()
model.add(LSTM(32, input_shape=(look_back, X_train.shape[1])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')