In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from scipy.ndimage import gaussian_filter1d

In [4]:
def load_data(file_path):
    data = pd.read_csv(file_path, index_col=0, parse_dates=True)
    print(data.head())  # Print the first few rows to inspect the data
    print(data.index)   # Print the index to check its type and format

    # Convert timezone-aware datetime to naive datetime
    if data.index.tz is not None:
        data.index = data.index.tz_localize(None)

    # Convert datetime to date-only
    data.index = pd.to_datetime(data.index).normalize()  # Normalize to remove time part, keep only date

    # Filter data within the specified date range
    start_date = pd.to_datetime('2012-01-01')
    end_date = pd.to_datetime('2020-12-31')
    data = data[(data.index >= start_date) & (data.index <= end_date)]
    
    return data

In [5]:
# Prepare data for LSTM
def prepare_data(data, look_back=60):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    
    X, y = [], []
    for i in range(look_back, len(scaled_data)):
        X.append(scaled_data[i-look_back:i])
        y.append(scaled_data[i, 0])  # Predicting the 'Close' price
    
    return np.array(X), np.array(y), scaler


In [6]:
# Build LSTM model
def build_model(input_shape):
    model = Sequential([
        LSTM(units=50, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=50, return_sequences=True),
        Dropout(0.2),
        LSTM(units=50),
        Dropout(0.2),
        Dense(units=1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
    return model

In [7]:

# Train model
def train_model(model, X_train, y_train, epochs=10, batch_size=32, validation_split=0.2):
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=validation_split,
        shuffle=False
    )
    return history

In [8]:
# Make predictions
def make_predictions(model, X_test, scaler):
    predictions = model.predict(X_test)
    return scaler.inverse_transform(predictions)

# Evaluate model
def evaluate_model(y_true, y_pred):
    mse = np.mean((y_true - y_pred)**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_true - y_pred))
    return mse, rmse, mae

# Plot results
def plot_results(y_true, y_pred):
    plt.figure(figsize=(12, 6))
    plt.plot(y_true, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.title('LSTM Model: Actual vs Predicted Stock Prices')
    plt.xlabel('Time')
    plt.ylabel('Stock Price')
    plt.legend()
    plt.show()

In [None]:
# # Main function

#     # Load data
file_path = 'SP425CSVs/TSLA_data_with_metrics.csv'  # Replace with your CSV file name
dataCSV = pd.read_csv(file_path)
    
print(dataCSV)

# Select features for training
# features = ['Close','P/E Ratio', 'P/B Ratio', 'Dividend Yield', 'Dividend Payout Ratio', #4
#             'ROE', 'ROA', 'Beta', 'Market Capitalization', 'Revenue Growth', #5
#             'Debt-to-Equity Ratio', 'Free Cash Flow', 'Current Ratio', 'Quick Ratio', #4
#             'PEG Ratio', 'Standard Deviation', 'Value at Risk (VaR)', 'Sharpe Ratio', #4
#             'Sortino Ratio', 'Maximum Drawdown', 'Downside Deviation', 'Tracking Error', #4
#             'R-squared', 'Treynor Ratio', 'Information Ratio', 'Conditional Value at Risk (CVaR)', #4
#             'Beta-adjusted Sharpe Ratio', 'Drawdown Duration', 'Ulcer Index', 'Jensens Alpha']#4
features = ['Close','P/E Ratio', 'P/B Ratio', #4
            'ROE', 'Beta', 'Revenue Growth', #5
            'Debt-to-Equity Ratio', 'Standard Deviation', #4
            'Maximum Drawdown', 'Downside Deviation', #4
            'R-squared', 'Treynor Ratio', 'Conditional Value at Risk (CVaR)', #4
            'Beta-adjusted Sharpe Ratio', 'Ulcer Index', 'Jensens Alpha']#4
data = dataCSV[features]
datesRev=dataCSV['Date']
# print(datesRev)

# Convert 'Date' column to UTC or remove timezone
datesRev = pd.to_datetime(dataCSV['Date'], utc=True)  # Converts to UTC

# If you prefer to remove the timezone info
# datesRev = pd.to_datetime(dataCSV['Date']).dt.tz_convert(None)

# Optionally set the 'Date' column as the index
dataCSV.set_index(datesRev, inplace=True)

# Now 'datesRev' contains the dates in datetime format without timezone issues
# print(datesRev.head())  # To check the first few dates




In [None]:
data=dataCSV[features]
data.fillna(0, inplace=True)
data.shape

In [None]:
def prepare_data(data, look_back=45):
    X, y, dates = [], [], []
    for i in range(len(data) - look_back):
        X.append(data.iloc[i:(i + look_back)].values)
        y.append(data['Close'].iloc[i + look_back])
        dates.append(data.index[i + look_back])
    return np.array(X), np.array(y), np.array(dates)

# Prepare your data
data['Close'] = data['Close'].shift(-1)  # Shift Close price to predict next day's price
data = data.dropna()  # Remove any rows with NaN values
X, y, dates = prepare_data(data)

# Split the data
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
dates_train, dates_test = dates[:split], dates[split:]

# Scale the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test_scaled = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train.reshape(-1, 1))
y_test_scaled = y_scaler.transform(y_test.reshape(-1, 1))

In [None]:
X


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

def build_model(input_shape):
    model = Sequential([
        LSTM(32, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(16, return_sequences=False),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Build the model
model = build_model((X_train_scaled.shape[1], X_train_scaled.shape[2]))

In [None]:
# Train the model
history = model.fit(
    X_train_scaled, y_train_scaled,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    shuffle=False
)

In [None]:
print(model.summary())


In [16]:
# Moving Average Filter
def moving_average(data, window_size):
    return np.convolve(data, np.ones(window_size) / window_size, mode='valid')


In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Make predictions
y_pred_scaled = model.predict(X_test_scaled)
y_pred = y_scaler.inverse_transform(y_pred_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")

# Ensure dates_test is a pandas DatetimeIndex
dates_test = pd.to_datetime(dates_test)

print(f"Date range: {dates_test.min()} to {dates_test.max()}")

# Plot results


plt.figure(figsize=(16, 8))
plt.plot(dates_test, y_test, label='Actual', linewidth=2)
plt.plot(dates_test, y_pred, label='Predicted', linewidth=2)
plt.title('LSTM Model: Actual vs Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()

# Format x-axis to show dates nicely
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=3))
plt.gcf().autofmt_xdate()  # Rotate and align the tick labels

plt.tight_layout()
plt.show()

In [18]:
data.index = pd.to_datetime(data.index)

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Assuming dates_test, y_test, and y_pred are already defined
# Ensure dates_test is a pandas DatetimeIndex
dates_test = pd.to_datetime(dates_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")

# Create traces for actual and predicted values
trace_actual = go.Scatter(
    x=dates_test,
    y=y_test,
    mode='lines',
    name='Actual',
    line=dict(width=2)
)

trace_predicted = go.Scatter(
    x=dates_test, 
    y=y_pred.flatten(),  # Removed the -20 adjustment to keep predictions accurate
    mode='lines', 
    name='Predicted', 
    line=dict(width=2)
)

# Create layout
layout = go.Layout(
    title='LSTM Model: Actual vs Predicted Stock Prices',
    xaxis=dict(title='Date', tickformat='%Y-%m-%d'),
    yaxis=dict(title='Stock Price'),
    hovermode='x',  # Add hovermode for interactive hover
)

# Create the figure
fig = go.Figure(data=[trace_actual,trace_predicted], layout=layout)

# Show interactive plot
fig.show()

In [None]:
# Assuming dates_test, y_test, and y_pred are already defined
# Ensure dates_test is a pandas DatetimeIndex
dates_test = pd.to_datetime(dates_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")


# Flatten the y-values for trace_predicted
trace_predicted = go.Scatter(
    line={'width': 2},
    mode='lines',
    name='Predicted',
    x=predicted_x_values,  # Assuming this is your x-axis array for predicted values
    y=predicted_y_values.flatten()  # Flatten the 2D array to make it 1D
)


# Create layout
layout = go.Layout(
    title='LSTM Model: Predicted Stock Prices',
    xaxis=dict(title='Date', tickformat='%Y-%m-%d'),
    yaxis=dict(title='Stock Price'),
    hovermode='x',  # Add hovermode for interactive hover
)

# Create the figure
fig = go.Figure(data=[trace_actual,trace_predicted], layout=layout)

# Show interactive plot
fig.show()


In [None]:
print(trace_actual,"\n\n\n\n",trace_predicted)

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

# Load the dataset
file_path = "SP425CSVs/AAPL_data_with_metrics.csv"  # Update with your file path if needed
data = pd.read_csv(file_path)

# Feature selection
selected_features = ['Open', 'High', 'Low', 'Close', 'Volume', 'P/E Ratio', 'P/B Ratio',
                     'Dividend Yield', 'ROE', 'ROA', 'Beta', 'Market Capitalization', 'Revenue Growth']
data_selected = data[selected_features]

# Scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_selected)

# Prepare the sequences for LSTM
sequence_length = 60
X, y = [], []
for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i-sequence_length:i])  # Last 60 days
    y.append(scaled_data[i, 3])  # 'Close' is at index 3

X, y = np.array(X), np.array(y)

# Split the data into train, validation, and test sets
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]
X_test, y_test = X[train_size + val_size:], y[train_size + val_size:]

# Build the updated LSTM model with Bidirectional and more units
model = Sequential()
model.add(Bidirectional(LSTM(units=100, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))  # Predicting the 'Close' price

# Compile with a lower learning rate
optimizer = Adam(learning_rate=0.0005)  # Reduced learning rate
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
predicted_prices = model.predict(X_test)
mse = mean_squared_error(y_test, predicted_prices)
mape = mean_absolute_percentage_error(y_test, predicted_prices)

print(f"Test MSE: {mse}")
print(f"Test MAPE: {mape}")

# Visualize the results
plt.figure(figsize=(12, 6))
plt.plot(y_test, color='blue', label='Actual Prices')
plt.plot(predicted_prices, color='red', label='Predicted Prices')
plt.title('Stock Price Prediction with Improved Model')
plt.xlabel('Time')
plt.ylabel('Normalized Price')
plt.legend()
plt.show()

In [None]:
model.summary()

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GRU, Dense, Dropout, Bidirectional, Input, Attention
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt

# Load the dataset
file_path = "SP425CSVs/AAPL_data_with_metrics.csv"  # Update with your file path if needed
data = pd.read_csv(file_path)

# Feature selection
selected_features = ['Open', 'High', 'Low', 'Close', 'Volume', 'P/E Ratio', 'P/B Ratio',
                     'Dividend Yield', 'ROE', 'ROA', 'Beta', 'Market Capitalization', 'Revenue Growth']
data_selected = data[selected_features]

# Scaling the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_selected)

# Prepare sequences for multi-step prediction (5 days ahead)
sequence_length = 90  # Adjusted sequence length to 90
future_target = 5  # Number of days ahead to predict

X, y = [], []
for i in range(sequence_length, len(scaled_data) - future_target):
    X.append(scaled_data[i-sequence_length:i])  # Last 90 days
    y.append(scaled_data[i + future_target - 1, 3])  # 'Close' price 5 days ahead

X, y = np.array(X), np.array(y)

# Split the data into train, validation, and test sets
train_size = int(0.7 * len(X))
val_size = int(0.15 * len(X))

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]
X_test, y_test = X[train_size + val_size:], y[train_size + val_size:]

# Build the model with GRU layers and an Attention layer
input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))
x = Bidirectional(GRU(units=150, return_sequences=True))(input_layer)
x = Dropout(0.2)(x)

# Attention Layer: provide [query, value] as input to the Attention layer
attention_out = Attention()([x, x])

x = GRU(units=150, return_sequences=False)(attention_out)
x = Dropout(0.2)(x)
output_layer = Dense(units=1)(x)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model with a low learning rate for stability
optimizer = Adam(learning_rate=0.0005)
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
predicted_prices = model.predict(X_test)
mse = mean_squared_error(y_test, predicted_prices)
mape = mean_absolute_percentage_error(y_test, predicted_prices)

print(f"Test MSE: {mse}")
print(f"Test MAPE: {mape}")

# Visualize the results
plt.figure(figsize=(12, 6))
plt.plot(y_test, color='blue', label='Actual Prices')
plt.plot(predicted_prices, color='red', label='Predicted Prices')
plt.title('Stock Price Prediction with Multi-step Forecast and GRU')
plt.xlabel('Time')
plt.ylabel('Normalized Price')
plt.legend()
plt.show()

# Save the model
# model.save("multi_step_attention_gru_model.h5")
