In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")


In [None]:

# Load the dataset
data = pd.read_csv('all_stocks_5yr.csv', on_bad_lines='skip')
print("Initial shape:", data.shape)
data.head()


In [None]:

# Check for missing values and remove them
print(data.isnull().sum())
data = data.dropna()

# Convert date column to datetime and sort
data['date'] = pd.to_datetime(data['date'])
data = data.sort_values('date')


In [None]:

# Select one company for prediction (e.g., AAPL)
company_df = data[data['Name'] == 'AAPL'].copy()
company_df = company_df.sort_values('date').reset_index(drop=True)

# Plot open vs close prices
plt.figure(figsize=(12,6))
plt.plot(company_df['date'], company_df['close'], label='Close Price', color='red')
plt.plot(company_df['date'], company_df['open'], label='Open Price', color='green')
plt.title('AAPL Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


In [None]:

# Scale close prices
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(company_df[['close']])

# Create sequences
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)

# Train-test split (time-based)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Reshape for LSTM [samples, time_steps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [None]:

model = keras.Sequential([
    keras.layers.LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    keras.layers.Dropout(0.2),
    keras.layers.LSTM(50),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()


In [None]:

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


In [None]:

predicted_prices = scaler.inverse_transform(model.predict(X_test))
actual_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot predictions vs actual
plt.figure(figsize=(12,6))
plt.plot(company_df['date'][-len(actual_prices):], actual_prices, color='blue', label='Actual Price')
plt.plot(company_df['date'][-len(predicted_prices):], predicted_prices, color='red', label='Predicted Price')
plt.title('AAPL Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

# Calculate RMSE
rmse = math.sqrt(mean_squared_error(actual_prices, predicted_prices))
print(f"RMSE: {rmse}")
