In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [3]:
data = pd.read_csv('data.csv')

# Display the first few rows of the dataset to understand its structure
data.head()

Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low
0,AAPL,07/17/2023,$193.99,50520160,$191.90,$194.32,$191.81
1,AAPL,07/14/2023,$190.69,41616240,$190.23,$191.1799,$189.63
2,AAPL,07/13/2023,$190.54,41342340,$190.50,$191.19,$189.78
3,AAPL,07-12-2023,$189.77,60750250,$189.68,$191.70,$188.47
4,AAPL,07-11-2023,$188.08,46638120,$189.16,$189.30,$186.60


In [None]:
# Load the dataset
#file_path = 'data.csv'  # Replace with your file name if different
data = pd.read_csv(file_path)
# Select the desired columns using a list

prices = data[['Close/Last', 'Open', 'High', 'Low']].values.reshape(-1, 1)

# Convert 'Close/Last', 'Open', 'High', 'Low' columns to numeric, removing '$' and ','
for col in ['Close/Last', 'Open', 'High', 'Low']:
    data[col] = data[col].str.replace('$', '').str.replace(',', '').astype(float)

# Convert 'Date' column to datetime and sort by date
data['Date'] = pd.to_datetime(data['Date'], format= 'mixed')
data = data.sort_values(by='Date')

# Select only the "Date" and "Close/Last" columns
data_prepared = data[['Date', 'Close/Last']].set_index('Date')

In [None]:
# Scale the "Close/Last" column to values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
data_prepared['Normalized_Close'] = scaler.fit_transform(data_prepared[['Close/Last']])

In [None]:
# Function to create sequences for training the RNN
def create_sequences(data, sequence_length=60):
    x, y = [], []
    for i in range(len(data) - sequence_length):
        x.append(data[i:i+sequence_length])  # Input: last 60 days
        y.append(data[i+sequence_length])   # Output: next day's price
    return np.array(x), np.array(y)

# Create sequences with a window size of 60
sequence_length = 60
values = data_prepared['Normalized_Close'].values
x, y = create_sequences(values, sequence_length)

In [None]:
# Split data (80% for training, 20% for testing)
train_size = int(len(x) * 0.8)
x_train, x_test = x[:train_size], x[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
# Build the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(sequence_length, 1)),  # Layer 1
    Dropout(0.2),  # Prevent overfitting
    LSTM(50, return_sequences=False),  # Layer 2
    Dropout(0.2),  # Prevent overfitting
    Dense(25),  # Fully connected layer
    Dense(1)    # Output layer (1 value)
])

# Compile the model with optimizer and loss function
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Reshape the data for the LSTM model (add a feature dimension)
x_train_reshaped = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test_reshaped = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

# Train the model
history = model.fit(x_train_reshaped, y_train,
                    validation_data=(x_test_reshaped, y_test),
                    epochs=1, batch_size=32, verbose=1)

In [None]:
# Predict on the test data
predicted_prices = model.predict(x_test_reshaped)

# Undo normalization to get actual price values
predicted_prices = scaler.inverse_transform(predicted_prices)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

In [None]:
import matplotlib.pyplot as plt

# Plot the actual vs. predicted prices
plt.figure(figsize=(12, 6))
plt.plot(y_test_actual, label="Actual Prices")
plt.plot(predicted_prices, label="Predicted Prices")
plt.title("Stock Price Prediction")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Calculate MSE and RMSE
mse = mean_squared_error(y_test_actual, predicted_prices)
rmse = np.sqrt(mse)
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

In [None]:
# Calculate MAPE
mape = np.mean(np.abs((y_test_actual - predicted_prices) / y_test_actual)) * 100
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")

In [None]:
# Plot training vs. validation loss
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive
from google.colab.patches import cv2_imshow
drive.mount('/content/drive')

file_path = "/content/drive/MyDrive/ML/data.csv"
data = pd.read_csv(file_path)
prices = data[['Close/Last', 'Open', 'High', 'Low']].values.reshape(-1, 1)

# Convert 'Close/Last' column to numeric, removing '$' and ','

prices = data['Close/Last', 'Open', 'High', 'Low'].values.reshape(-1, 1)
data['Close/Last', 'Open', 'High', 'Low'] = data['Close/Last', 'Open', 'High', 'Low'].str.replace('$', '').str.replace(',', '').astype(float)

# Convert 'Close/Last', 'Open', 'High', 'Low' columns to numeric, removing '$' and ','
for col in ['Close/Last', 'Open', 'High', 'Low']:
    prices[col] = pd.to_numeric(prices[col].str.replace('[\$,]', '', regex=True), errors='coerce')


# Scale data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(prices)

# Create sequences
def create_sequences(data, sequence_length):
    x, y = [], []
    for i in range(len(data) - sequence_length):
        x.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(x), np.array(y)

sequence_length = 60
X, y = create_sequences(scaled_data, sequence_length)

# Split into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Build the LSTM model with more neurons
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(sequence_length, 1)))  # Increased to 128 neurons
model.add(LSTM(64, return_sequences=False))  # Second LSTM with 64 neurons
model.add(Dense(1))  # Output layer

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

# Predict on test data
predicted_prices = model.predict(X_test)

# Inverse transform the predictions
predicted_prices = scaler.inverse_transform(predicted_prices)
y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1))

# Evaluate the model
mse = mean_squared_error(y_test_original, predicted_prices)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test_original, predicted_prices) * 100

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}%")

# Plot actual vs predicted prices
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y_test_original, label="Actual Prices")
plt.plot(predicted_prices, label="Predicted Prices")
plt.title("Stock Price Prediction with Increased Neurons")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.show()