In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error

Python Library Imports
import pandas as pd

Imports the pandas library with the common alias 'pd'
Pandas is used for data manipulation and analysis, particularly with structured data in DataFrames
import numpy as np

Imports the numpy library with the standard alias 'np'
NumPy provides support for large, multi-dimensional arrays and matrices, along with mathematical functions
import matplotlib.pyplot as plt

Imports matplotlib's pyplot module with the conventional alias 'plt'
Matplotlib is a comprehensive library for creating static, animated, and interactive visualizations
from sklearn.preprocessing import MinMaxScaler

Imports the MinMaxScaler class from scikit-learn's preprocessing module
MinMaxScaler is used to scale features to a fixed range, typically between 0 and 1
from tensorflow.keras.models import Sequential

Imports the Sequential model class from Keras (TensorFlow's high-level API)
Sequential is the simplest type of neural network model, where layers are stacked linearly
from tensorflow.keras.layers import LSTM, Dense, Dropout

Imports specific layer types from Keras:
LSTM (Long Short-Term Memory): A type of recurrent neural network layer
Dense: A regular fully-connected neural network layer
Dropout: A regularization layer to prevent overfitting
from sklearn.metrics import mean_squared_error

Imports the mean_squared_error function from scikit-learn's metrics module
Used to evaluate the model's performance by calculating the average squared difference between predicted and actual values

In [None]:
!wget https://raw.githubusercontent.com/plotly/datasets/master/tesla-stock-price.csv -O dataset.csv

Downloads Tesla stock price data from GitHub's plotly dataset repository
Saves it locally as 'dataset.csv'
Cell 2: Data Loading and Initial Inspection

In [None]:
df = pd.read_csv("dataset.csv")
print(df.head())
print(df.shape)

Loads the CSV file into a pandas DataFrame
Displays the first 5 rows of data using head()
Shows the dimensions of the dataset (rows, columns) using shape

In [16]:
df = df.sort_values("date")
df['next_day_close'] = df['close'].shift(-1)
df = df.dropna()  # Remove any missing values

# Clean up the 'volume' column by removing commas and converting to numeric
df['volume'] = df['volume'].astype(str).str.replace(',', '', regex=False)
df['volume'] = pd.to_numeric(df['volume'])


# Keep all features except 'date' and 'next_day_close' as inputs
features = df.drop(["date", "next_day_close"], axis=1)
target = df["next_day_close"]

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features)
scaled_target = scaler.fit_transform(target.values.reshape(-1, 1))

Sorts data by date
Creates target variable (next day's closing price)
Removes any rows with missing values
Cleans the volume column by removing commas and converting to numeric
Separates features and target variables

In [None]:
def create_sequences(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(time_steps, len(X)):
        Xs.append(X[i-time_steps:i])
        ys.append(y[i])
    return np.array(Xs), np.array(ys)

time_steps = 60
X, y = create_sequences(scaled_features, scaled_target, time_steps)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Creates a function to generate sequences for time series data
Uses 60 days of historical data to predict the next day
Creates input sequences and corresponding target values
Prints the shapes of resulting arrays

In [18]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

Splits data into training (80%) and testing (20%) sets

In [None]:
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(100, return_sequences=False),
    Dropout(0.2),
    Dense(50, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.summary()

Creates a Sequential LSTM model with:
First LSTM layer with 100 units and return sequences
Dropout layer (20% dropout rate)
Second LSTM layer with 100 units
Another Dropout layer
Dense layer with 50 units and ReLU activation
Output layer with 1 unit
Compiles model with Adam optimizer and MSE loss
Displays model summary

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.1,
    shuffle=False
)

Trains the model for 50 epochs
Uses batch size of 32
Sets aside 10% of training data for validation
Keeps time series order (no shuffling)

In [None]:
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)
y_test_actual = scaler.inverse_transform(y_test)

plt.figure(figsize=(12,6))
plt.plot(y_test_actual, label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()

rmse = np.sqrt(mean_squared_error(y_test_actual, predictions))
print("RMSE:", rmse)


Makes predictions on test data
Inverse transforms predictions and actual values to original scale
Creates a plot comparing actual vs predicted values
Calculates and displays the Root Mean Square Error

In [None]:
model.save("tesla_lstm_model.h5")

Saves the trained model to a file

In [None]:
last_60_days = scaled_features[-60:]
X_future = np.array([last_60_days])
predicted_price = model.predict(X_future)
predicted_price = scaler.inverse_transform(predicted_price)
print("Predicted Next Day Close:", predicted_price[0][0])

Takes the last 60 days of data
Makes a prediction for the next day's closing price
Transforms the prediction back to original scale
Displays the predicted price