In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import psycopg2
from config import PGHOST, PGDATABASE, PGUSER, PGPASSWORD

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LSTM

In [2]:
# Define a function to connect to AWS database instance

def connect():
    
    # Set up a connection to the postgres server.
    conn_string = "host="+PGHOST +" port="+ "5432" +" dbname="+PGDATABASE +" user=" + PGUSER \
                  +" password="+ PGPASSWORD
    
    conn = psycopg2.connect(conn_string)
    print("Connected!")

    # Create a cursor object
    cursor = conn.cursor()
    
    return conn, cursor

In [3]:
conn, cursor = connect()

OperationalError: connection to server at "stocksdb.cndg3gmolxwq.us-east-1.rds.amazonaws.com" (18.204.81.179), port 5432 failed: Connection timed out (0x0000274C/10060)
	Is the server running on that host and accepting TCP/IP connections?


In [None]:
# Get the data, read SQL query into data frame
gg_data = pd.read_sql_query("SELECT * FROM google", con=conn)
gg_df = pd.DataFrame(gg_data)
gg_df.head()

In [None]:
gg_df = gg_df.drop(['adj_close','volume'], axis=1)

gg_df.head()

In [None]:
# Create a date series for plotting purpose
date_train = pd.to_datetime(gg_df['Date'])

date_train.head()

In [None]:
# Select "open, high, low, close" as input features

columns = list(gg_df)[1:5]

print(columns)

In [None]:
# Create a new dataframe with only training data - 4 columns

gg_training_df = gg_df[columns].astype(float)

gg_training_df.head()

In [None]:
# LSTM uses sigmoid and tanh that are sensitive to magnitude so values need to be normalized
# scaled the traing dataset

# scaler = MinMaxScaler(feature_range=(0,1))
scaler = StandardScaler()
scaler = scaler.fit(gg_training_df)
scaled_training_data = scaler.transform(gg_training_df)

print(scaled_training_data.shape)

In [None]:
# LSTM require to reshape the input data into (n_samples, timesteps, n_features). 
# This project we choose n_features=4 and timesteps = 30 (past days data used for training). 
# Create empty X_train and y_train list 
X_train = []
y_train = []

training_data_len = round(len(scaled_training_data)*0.8)
# Define future days we want to predict and look-back days used for prediction

look_back = 30 # number of days to be used for prediction
num_future_days = 1 # number of days to be prediced

for i in range(look_back, training_data_len-num_future_days+1):
    X_train.append(scaled_training_data[i-look_back:i, 0:scaled_training_data.shape[1]])
    y_train.append(scaled_training_data[i+num_future_days-1:i+num_future_days, 3])

# Create X_test list

X_test = []

for j in range(training_data_len, len(scaled_training_data)-num_future_days+1):
    X_test.append(scaled_training_data[j-look_back:j, 0:scaled_training_data.shape[1]])

In [None]:
X_train, y_train, X_test = np.array(X_train), np.array(y_train), np.array(X_test)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)

In [None]:
# Build the LSTM model

model = Sequential()

# Add two hidden layers
model.add(LSTM(units=64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(LSTM(units=32, activation='relu', return_sequences=False))

# Add output layer, which contains 1 output
model.add(Dense(units=1))

# Complile the model 
model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

In [None]:
# Fit the model

history = model.fit(X_train, y_train, batch_size =32, epochs=20, verbose=1)

In [None]:
# Test the model
# Make prediction

prediction = model.predict(X_test)

print(prediction.shape)

In [None]:
#Perform inverse transformation to rescale back to original range
#Since we used 4 variables for transform, the inverse expects same dimensions
#Therefore, we copy our prediction values 4 times and discard them after inverse transform

prediction_copies = np.repeat(prediction, scaled_training_data.shape[1], axis=-1)
print(prediction_copies)

In [None]:
# Discard the extra columns
y_pred = scaler.inverse_transform(prediction_copies)[:,0]
print(y_pred.shape)

In [None]:
# Create the y_test data to compare with the predictions

close_df = pd.DataFrame({'Date':gg_df['Date'], 'close_price':gg_training_df['close_price']})
y_test = close_df[training_data_len:]
y_test.head()

In [None]:
# plot test predictions

plt.figure(figsize=(12, 8))
plt.plot(date_train, gg_training_df['close_price'])
plt.plot(y_test['Date'], y_pred, color='red', label='Predicted')
plt.plot(y_test['Date'], y_test['close_price'], color='green', label='Actual')
plt.xlabel('Date')
plt.ylabel('Stock Price')

plt.title("Stock Price predicted by LSTM Model")
plt.grid()
plt.legend()
plt.show()