In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.layers import Dropout
import tensorflow as tf

# Read the csv file and clean the data
stock_data = pd.read_csv('../dataCleaning/data.csv')
stock_data['Date'] = pd.to_datetime(stock_data['Date'], errors='coerce')
price_columns = ['Close/Last', 'Open', 'High', 'Low']
for column in price_columns:
    stock_data[column] = stock_data[column].str.replace('$', '').astype(float)
AMZN_data = stock_data[stock_data['Company'] == 'AMZN']
AMZN_data = AMZN_data.sort_values(by="Date")

# Time Series Splitting
train_size = int(0.8 * len(AMZN_data))
train_data = AMZN_data[:train_size]
test_data = AMZN_data[train_size:]

# Data Scaling
scaler = MinMaxScaler()
train_data['Close/Last'] = scaler.fit_transform(train_data[['Close/Last']])
test_data['Close/Last'] = scaler.transform(test_data[['Close/Last']])

# Sequence Creation
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        sequence = data.iloc[i:i+sequence_length]['Close/Last'].values
        target = data.iloc[i+sequence_length]['Close/Last']
        sequences.append((sequence, target))
    return sequences

sequence_length = 60  
train_sequences = create_sequences(train_data, sequence_length)
test_sequences = create_sequences(test_data, sequence_length)

# Convert sequences to numpy arrays
X_train = np.array([seq[0] for seq in train_sequences])
y_train = np.array([seq[1] for seq in train_sequences])
X_test = np.array([seq[0] for seq in test_sequences])
y_test = np.array([seq[1] for seq in test_sequences])

# Model Building
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(sequence_length, 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Model Compilation
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Model Training
model.fit(X_train, y_train, epochs=70, batch_size=32)

# Model Evaluation
train_loss = model.evaluate(X_train, y_train)
test_loss = model.evaluate(X_test, y_test)
print(f"Training Loss: {train_loss}")
print(f"Testing Loss: {test_loss}")

# Prediction
y_pred = model.predict(X_test)

# Inverse transform scaled predictions to get actual prices
y_pred_actual = scaler.inverse_transform(y_pred)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Visualization
plt.figure(figsize=(16, 6))
plt.title('AMZN Stock Price Prediction')
plt.plot(test_data['Date'].iloc[sequence_length:], y_test_actual, label='Actual Closing Price')
plt.plot(test_data['Date'].iloc[sequence_length:], y_pred_actual, label='Predicted Closing Price')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Closing Price USD ($)', fontsize=18)
plt.legend()
plt.show()