<a href="https://www.kaggle.com/code/shishirkulal/stock-prediction-dollar?scriptVersionId=286607668" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [8]:
!pip install yfinance # Install the library

import yfinance as yf
import pandas as pd
import numpy as np

# Define the stock ticker and the time period
ticker = 'AAPL'  # Example: Apple Inc.
start_date = '2010-01-01'
end_date = pd.to_datetime('today').strftime('%Y-%m-%d')

# Fetch the data
df = yf.download(ticker, start=start_date, end=end_date)

# Display the first few rows
print(df.head())



  df = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed

Price          Close      High       Low      Open     Volume
Ticker          AAPL      AAPL      AAPL      AAPL       AAPL
Date                                                         
2010-01-04  6.418384  6.433080  6.369499  6.400989  493729600
2010-01-05  6.429481  6.465770  6.395591  6.436079  601904800
2010-01-06  6.327211  6.454973  6.320613  6.429480  552160000
2010-01-07  6.315515  6.358102  6.269628  6.350604  477131200
2010-01-08  6.357501  6.358101  6.269928  6.307116  447610800





In [9]:
from sklearn.preprocessing import MinMaxScaler

# Use the 'Close' price for prediction, which is common
data = df['Close'].values.reshape(-1, 1)

# Scale the data (MinMaxScaler is crucial for LSTMs)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Define time-steps (how many previous days to look at to predict the next day)
prediction_days = 60

# Create the training data structure
x_train = []
y_train = []

for i in range(prediction_days, len(scaled_data)):
    # x_train contains the previous 'prediction_days' values (features)
    x_train.append(scaled_data[i - prediction_days:i, 0])
    # y_train is the value to predict (the current day's price)
    y_train.append(scaled_data[i, 0])

# Convert to NumPy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape data for LSTM input: [samples, time_steps, features]
# Here, features is 1 (just the closing price)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

print(f"X_train shape: {x_train.shape}")
print(f"Y_train shape: {y_train.shape}")

X_train shape: (3953, 60, 1)
Y_train shape: (3953,)


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Build the LSTM model
model = Sequential()

# Layer 1: LSTM with 50 units. `return_sequences=True` for stacking LSTMs.
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))

# Layer 2: Stacked LSTM layer
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))

# Layer 3: Final LSTM layer. `return_sequences=False` for the last LSTM.
model.add(LSTM(units=50))
model.add(Dropout(0.2))

# Dense layer for output prediction
model.add(Dense(units=1))

# Compile the model
# Optimizer: 'adam' is a good starting point
# Loss: 'mean_squared_error' (MSE) is standard for regression problems like price prediction
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
# epochs: number of times the model sees the entire dataset
# batch_size: number of samples per gradient update
# NOTE: This training is done on Kaggle's cloud GPUs/CPUs
print("Starting model training...")
model.fit(x_train, y_train, epochs=25, batch_size=32)
print("Training complete.")


Starting model training...
Epoch 1/25


  super().__init__(**kwargs)


[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 0.0142
Epoch 2/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0020
Epoch 3/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0014
Epoch 4/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0013
Epoch 5/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0013
Epoch 6/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0012
Epoch 7/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0015
Epoch 8/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0012
Epoch 9/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0015
Epoch 10/25
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - los

In [11]:
# 1. Get the last 60 days of historical data for the chosen stock
test_data_start = pd.to_datetime('today') - pd.DateOffset(days=prediction_days + 1)
test_df = yf.download(ticker, start=test_data_start)

# 2. Extract the data needed for prediction
# We need the last 'prediction_days' (60) closing prices
total_dataset = pd.concat((df['Close'], test_df['Close']), axis=0)
model_inputs = total_dataset.values[len(total_dataset) - len(test_df) - prediction_days:].reshape(-1, 1)

# 3. Scale this new data using the *same scaler* fitted on the training data
model_inputs = scaler.transform(model_inputs)

# 4. Prepare the X_test structure (the last 'prediction_days' worth of prices)
x_test = []
x_test.append(model_inputs[-prediction_days:, 0]) # Get the last 60 scaled values
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

# 5. Make the prediction (this is a scaled value)
predicted_price_scaled = model.predict(x_test)

# 6. Inverse transform the result to get the actual dollar price
predicted_price = scaler.inverse_transform(predicted_price_scaled)

print(f"\n--- Live Stock Prediction for {ticker} ---")
print(f"The model predicts the next closing price will be: ${predicted_price[0][0]:.2f}")

  test_df = yf.download(ticker, start=test_data_start)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step

--- Live Stock Prediction for AAPL ---
The model predicts the next closing price will be: $273.82


In [12]:
def predict_stock_price(ticker, prediction_days=60, epochs=25, batch_size=32):
    # 1. Data Acquisition (as in step 1)
    df = yf.download(ticker, start='2010-01-01', end=pd.to_datetime('today').strftime('%Y-%m-%d'))
    data = df['Close'].values.reshape(-1, 1)

    # 2. Preprocessing (as in step 2)
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    x_train, y_train = [], []
    for i in range(prediction_days, len(scaled_data)):
        x_train.append(scaled_data[i - prediction_days:i, 0])
        y_train.append(scaled_data[i, 0])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    
    # Check if there is enough data
    if x_train.shape[0] == 0:
        print(f"Not enough data to train for {ticker}")
        return

    # 3. Build and Train Model (as in step 3)
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    print(f"Training model for {ticker}...")
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0) # verbose=0 hides training output

    # 4. Prediction for the next day (as in step 4)
    # Get the last 60 days of historical data for the chosen stock
    test_data_start = pd.to_datetime('today') - pd.DateOffset(days=prediction_days + 1)
    test_df = yf.download(ticker, start=test_data_start)

    # The model input needs the last `prediction_days` prices
    model_inputs = test_df['Close'].values.reshape(-1, 1)
    
    # Transform using the *fitted* scaler
    model_inputs = scaler.transform(model_inputs)

    x_test = np.array([model_inputs[-prediction_days:, 0]])
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    predicted_price_scaled = model.predict(x_test)
    predicted_price = scaler.inverse_transform(predicted_price_scaled)

    print(f"\nPrediction for {ticker}'s next close:")
    print(f"Predicted Price: ${predicted_price[0][0]:.2f}")

# --- Example Usage ---
# Train and predict for a new stock (e.g., Google/Alphabet)
predict_stock_price('GOOG', epochs=10) # Using fewer epochs for quick testing
print("\n" + "="*50 + "\n")
# Train and predict for another new stock (e.g., Microsoft)
predict_stock_price('MSFT', epochs=10)

  df = yf.download(ticker, start='2010-01-01', end=pd.to_datetime('today').strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


Training model for GOOG...


  test_df = yf.download(ticker, start=test_data_start)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step


  df = yf.download(ticker, start='2010-01-01', end=pd.to_datetime('today').strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed


Prediction for GOOG's next close:
Predicted Price: $309.76


Training model for MSFT...



  super().__init__(**kwargs)
  test_df = yf.download(ticker, start=test_data_start)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step

Prediction for MSFT's next close:
Predicted Price: $479.24


In [13]:
# Ticker for Reliance on the NSE
reliance_ticker = 'RELIANCE.NS'

# Ticker for the Nifty 50 Index
nifty_ticker = '^NSEI'

# --- Example Usage for Indian Stock ---

# Train and predict for a major Indian company
print(f"Starting prediction for {reliance_ticker}...")
predict_stock_price(reliance_ticker, epochs=10) 

print("\n" + "="*50 + "\n")

# Train and predict for the Nifty 50 Index
print(f"Starting prediction for {nifty_ticker}...")
predict_stock_price(nifty_ticker, epochs=10)

  df = yf.download(ticker, start='2010-01-01', end=pd.to_datetime('today').strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed

Starting prediction for RELIANCE.NS...
Training model for RELIANCE.NS...



  super().__init__(**kwargs)
  test_df = yf.download(ticker, start=test_data_start)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step


  df = yf.download(ticker, start='2010-01-01', end=pd.to_datetime('today').strftime('%Y-%m-%d'))
[*********************100%***********************]  1 of 1 completed


Prediction for RELIANCE.NS's next close:
Predicted Price: $1512.21


Starting prediction for ^NSEI...
Training model for ^NSEI...



  super().__init__(**kwargs)
  test_df = yf.download(ticker, start=test_data_start)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step

Prediction for ^NSEI's next close:
Predicted Price: $25794.09
