In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

def fetch_batch_stock_data(ticker_symbol, period="1d", interval="1h"):
    """
    Fetch batch stock data for the given period and interval.
    """
    ticker = yf.Ticker(ticker_symbol)
    stock_data = ticker.history(period=period, interval=interval)
    return stock_data

# Example usage
ticker_symbol = "AAPL"
stock_data = fetch_batch_stock_data(ticker_symbol, period="1mo", interval="1h")  # Last 5 days with 1-hour interval
print(stock_data)


                                 Open        High         Low       Close  \
Datetime                                                                    
2024-10-02 09:30:00-04:00  225.889999  226.110001  223.029999  225.419998   
2024-10-02 10:30:00-04:00  225.399994  226.600006  225.270004  226.205002   
2024-10-02 11:30:00-04:00  226.220001  227.050003  226.164993  226.951996   
2024-10-02 12:30:00-04:00  226.949997  227.039398  226.589996  226.959900   
2024-10-02 13:30:00-04:00  226.940002  227.179993  226.350006  226.820007   
...                               ...         ...         ...         ...   
2024-11-01 11:30:00-04:00  223.220001  223.250000  221.770004  222.160004   
2024-11-01 12:30:00-04:00  222.139999  223.399902  222.050003  223.339996   
2024-11-01 13:30:00-04:00  223.340195  223.360001  222.070007  222.244995   
2024-11-01 14:30:00-04:00  222.240005  223.099899  222.139999  222.330002   
2024-11-01 15:30:00-04:00  222.339996  223.250000  222.270004  222.850006   

In [2]:
stock_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-10-02 09:30:00-04:00,225.889999,226.110001,223.029999,225.419998,9385176,0.0,0.0
2024-10-02 10:30:00-04:00,225.399994,226.600006,225.270004,226.205002,4586645,0.0,0.0
2024-10-02 11:30:00-04:00,226.220001,227.050003,226.164993,226.951996,3954712,0.0,0.0
2024-10-02 12:30:00-04:00,226.949997,227.039398,226.589996,226.9599,2173028,0.0,0.0
2024-10-02 13:30:00-04:00,226.940002,227.179993,226.350006,226.820007,2418335,0.0,0.0


In [3]:
def process_batch_data(stock_data):
    """
    Perform batch processing on stock data.
    """
    # Calculate a simple moving average over the last 5 periods (e.g., 5 hours)
    stock_data['Moving_Avg'] = stock_data['Close'].rolling(window=5).mean()
    return stock_data

# Process the batch data
processed_data = process_batch_data(stock_data)
print(processed_data.head())


                                 Open        High         Low       Close  \
Datetime                                                                    
2024-10-02 09:30:00-04:00  225.889999  226.110001  223.029999  225.419998   
2024-10-02 10:30:00-04:00  225.399994  226.600006  225.270004  226.205002   
2024-10-02 11:30:00-04:00  226.220001  227.050003  226.164993  226.951996   
2024-10-02 12:30:00-04:00  226.949997  227.039398  226.589996  226.959900   
2024-10-02 13:30:00-04:00  226.940002  227.179993  226.350006  226.820007   

                            Volume  Dividends  Stock Splits  Moving_Avg  
Datetime                                                                 
2024-10-02 09:30:00-04:00  9385176        0.0           0.0         NaN  
2024-10-02 10:30:00-04:00  4586645        0.0           0.0         NaN  
2024-10-02 11:30:00-04:00  3954712        0.0           0.0         NaN  
2024-10-02 12:30:00-04:00  2173028        0.0           0.0         NaN  
2024-10-02 13:30

In [4]:
processed_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Moving_Avg
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-10-02 09:30:00-04:00,225.889999,226.110001,223.029999,225.419998,9385176,0.0,0.0,
2024-10-02 10:30:00-04:00,225.399994,226.600006,225.270004,226.205002,4586645,0.0,0.0,
2024-10-02 11:30:00-04:00,226.220001,227.050003,226.164993,226.951996,3954712,0.0,0.0,
2024-10-02 12:30:00-04:00,226.949997,227.039398,226.589996,226.9599,2173028,0.0,0.0,
2024-10-02 13:30:00-04:00,226.940002,227.179993,226.350006,226.820007,2418335,0.0,0.0,226.471381


In [5]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
def prepare_data_for_lstm(stock_data):
    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(stock_data[['Close']])

    # Prepare sequences for LSTM (e.g., sequences of 60 steps)
    X_train, y_train = [], []
    for i in range(60, len(scaled_data)):
        X_train.append(scaled_data[i-60:i, 0])
        y_train.append(scaled_data[i, 0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    
    # Reshape X_train for LSTM input
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    return X_train, y_train, scaler

def train_lstm_model(X_train, y_train):
    # Define LSTM model
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
        tf.keras.layers.LSTM(50),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train model
    model.fit(X_train, y_train, epochs=5, batch_size=32)
    return model

# Prepare data and train model
X_train, y_train, scaler = prepare_data_for_lstm(processed_data)
model = train_lstm_model(X_train, y_train)


Epoch 1/5


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.3860 
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.1056
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0933
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0514
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0420


In [7]:
import streamlit as st
import plotly.graph_objs as go

import logging
logging.getLogger("mlflow").setLevel(logging.ERROR)


def display_dashboard(stock_data, predictions=None):
    st.title("Batch Processed Stock Price Dashboard")
    
    # Plot historical data
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=stock_data.index, y=stock_data['Close'], mode='lines', name='Actual Price'))
    
    if predictions is not None:
        fig.add_trace(go.Scatter(x=predictions.index, y=predictions['Predicted'], mode='lines', name='Predicted Price'))
    
    st.plotly_chart(fig)

# Example usage: Display dashboard with the latest processed data
display_dashboard(processed_data)


