In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import model_from_json

import mlutils
import pickle

2024-10-25 12:47:59.385888: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import pandas as pd
merged_df = pd.read_csv('data/merged_df.csv')

import pymongo
from pymongo import MongoClient

# # Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')  # MongoDB connection string

In [3]:
def get_all_ticker_data(ticker):
    print("Pulling raw ticker data")
    merged_df[merged_df['ticker'] == ticker]
    return merged_df[merged_df['ticker'] == ticker]

    db = client['stocks']
    collection = db['prepared_data']
    data = collection.find({"ticker": ticker})
    return pd.DataFrame(data)

In [4]:
def predict_RNN(ticker_df, ticker, seq_length, pred_dates_range):
    # Load the pre-trained model
    model = load_model(f'models/rnn/{ticker}_{seq_length}.keras')
    if model:
        print(f"RNN Model loaded for {ticker}_{seq_length}.keras")

    data, labels = mlutils.prep_ticker_data(ticker, ticker_df)
    X_train, X_test, y_train, y_test, scaler, split = mlutils.scale_and_split(labels, seq_length)

    print(X_test)
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)

    # Create the Plotly figure
    fig = go.Figure()

    # Add the actual closing price trace
    fig.add_trace(go.Scatter(
        x=ticker_df['date'],
        y=labels.flatten(),
        mode='lines',
        name='Actual Closing Price',
        line=dict(color='blue')
    ))

    # Add the predicted closing price trace
    fig.add_trace(go.Scatter(
        x=ticker_df['date'][split + seq_length:],
        y=predictions.flatten(),
        mode='lines',
        name='Predicted Closing Price',
        line=dict(color='green')
    ))

    # Add the predicted closing price trace for new dates
    new_dates = pd.date_range(ticker_df['date'].iloc[-1], periods=pred_dates_range, freq='D')

    # Make predictions for the next 7 days
    predictions_new = []
    last_sequence = X_test[-1]

    for _ in range(7):
        prediction = model.predict(np.expand_dims(last_sequence, axis=0))
        predictions_new.append(prediction[0, 0])
        last_sequence = np.append(last_sequence[1:], prediction, axis=0)

    # Inverse transform the predictions
    predictions_new = scaler.inverse_transform(np.array(predictions_new).reshape(-1, 1))


    fig.add_trace(go.Scatter(
        x=new_dates,
        y=predictions_new.flatten(),
        mode='lines',
        name='Predicted New Dates',
        line=dict(color='red')
    ))

    # Update the layout
    fig.update_layout(
        title=f'{ticker} Closing Price Prediction',
        xaxis_title='Date',
        yaxis_title='Closing Price',
        legend=dict(x=0, y=1)
    )

    return f"Predicted closing price for {ticker}: {predictions_new[-1][0]:.2f}", fig

In [13]:
steps = 14

def predict_Exo(t_df, ticker, seq_length, pred_dates_range):
    t_df = t_df.sort_values('date')
    t_df['date'] = pd.to_datetime(t_df['date'])
    data = t_df.set_index('date')
    data = data.asfreq('D')
    data = data.sort_index()
    data = data.fillna(method='ffill')
    data_train = data[:-steps]
    data_test  = data[-steps:]
    
    # Load the model from a file
    with open('forecaster_autoreg.pkl', 'rb') as file:
        loaded_forecaster = pickle.load(file)
        print("Model loaded successfully!")

    predictions = loaded_forecaster.predict(steps=steps, exog=data_test[['bert_score', 'bert_label', 'num_tweets']])


In [17]:
def predict_ARIMA(ticker, ticker_df, forecast_steps, exog_forecast):
     # Make predictions
    with open(f'models/arima/{ticker}.pkl', 'rb') as file:
        model = pickle.load(file)
        print("Model loaded successfully!")
    if model:
        print(f"ARIMA Model loaded for {ticker}.pkl")

    # Create a Plotly figure
    fig = go.Figure()

    # Add the actual data trace
    fig.add_trace(go.Scatter(
        x=ticker_df['date'],
        y=ticker_df['close'],
        mode='lines',
        name='Actual'
    ))

    split = int(0.2 * len(ticker_df))
    train_exog = ticker_df[['volume', 'bert_score', 'num_tweets']][-split:]
    train_forecast = model.get_forecast(steps=split, exog=train_exog)
    
    # Add the predicted closing price trace
    fig.add_trace(go.Scatter(
        x=ticker_df['date'][-split:],
        y=train_forecast.predicted_mean,
        mode='lines',
        name='Predicted Closing Price',
        line=dict(color='green')
    ))
    train_yhat_conf_int = train_forecast.conf_int(alpha=0.05)
    # Add the confidence interval traces
    fig.add_trace(go.Scatter(
        x=ticker_df['date'][-split:],
        y=train_yhat_conf_int['lower close'],
        mode='lines',
        name='Lower Confidence Interval',
        line=dict(dash='dash')
    ))

    fig.add_trace(go.Scatter(
        x=ticker_df['date'][-split:],
        y=train_yhat_conf_int['upper close'],
        mode='lines',
        name='Upper Confidence Interval',
        line=dict(dash='dash')
    ))


    print(exog_forecast)
    forecast = model.get_forecast(steps=forecast_steps, exog=exog_forecast)

    # Make predictions
    # 'volume', 'bert_score', 'num_tweets'
    new_dates = pd.date_range(ticker_df['date'].iloc[-1], periods=forecast_steps, freq='D')

    print("Forecasted values:")
    yhat = forecast.predicted_mean
    print(forecast.predicted_mean)

    print("\nConfidence intervals:")
    yhat_conf_int = forecast.conf_int(alpha=0.05)
    print(yhat_conf_int)

    # Add the forecast trace
    fig.add_trace(go.Scatter(
        x=new_dates,
        y=yhat,
        mode='lines',
        name='Forecast',
        line=dict(color='red')
    ))

    # Add the confidence interval traces
    fig.add_trace(go.Scatter(
        x=new_dates,
        y=yhat_conf_int['lower close'],
        mode='lines',
        name='Lower Confidence Interval',
        line=dict(dash='dash')
    ))

    fig.add_trace(go.Scatter(
        x=new_dates,
        y=yhat_conf_int['upper close'],
        mode='lines',
        name='Upper Confidence Interval',
        line=dict(dash='dash')
    ))

    # Update the layout
    fig.update_layout(
        title='ARIMAX Model Forecast',
        xaxis_title='Date',
        yaxis_title='Close Price',
        legend=dict(x=0, y=1)
    )
    
    pred_mean = round(forecast.predicted_mean.mean(), 2)
    upper_ci = round(yhat_conf_int['upper close'].mean(), 2)
    lower_ci = round(yhat_conf_int['lower close'].mean(), 2)
    return f"Predicted closing price for {ticker}: {pred_mean}, upper confidence interval {upper_ci} and lower confidence interval {lower_ci} ", fig

In [19]:
# Initialize the Dash app
app = dash.Dash(__name__)

dropdown_options = ['TSLA', 'AAPL', 'GOOG', 'AMZN', 'MSFT']

app.layout = html.Div([
    html.H1("Stock Price Prediction Dashboard"),
        html.Div([
            html.Div([
                html.Div([
                html.Label("Ticket Symbol:"),
                dcc.Dropdown(
                    id='stock-dropdown',
                    options=dropdown_options,
                    value='AAPL',  # Default value
                    placeholder="Select a stock"
                )], style={'width': '50%'}),
    
                html.Div([
                    html.Label("Prediction Days Ahead"),
                    dcc.Input(id='predict-dates-input', type='number', value=7),
                ], style={'width': '50%'}),
            ], style={'margin-top': '20px', 'display': 'flex', 'width': '100%'}),

            html.H2("RNN Model"),
            html.Div([
                html.Label("Model Rolling Window"),
                dcc.Dropdown(id='seq-length-input', options=[1, 3, 7], value=7),
            ], style={'width': '33%', 'margin-top': '20px'}),

            html.Button('Predict', id='predict-button', n_clicks=0, style={'margin-top': '20px'}),

            # Output Graphs
            html.Div([
                html.Div(id='prediction-output'),
                dcc.Graph(id='prediction-graph-rnn')
            ], style={'margin-top': '20px', 'width': '100%'}),

            html.H2("ARIMA Model"), 
            html.P("Exogenous Variables: Provide a comma-separated list of values for each variable for the number of days you are predicting, one per each day"),
            html.Div([
                html.Label("Volume"),
                dcc.Input(id='volume', type='text', value='', style={'width': '100%'}),
            ], style={'width': '100%'}),
            html.Div([
                html.Label("BERT Score"),
                dcc.Input(id='bert_score', type='text', value='', style={'width': '100%'}),
            ], style={'width': '100%'}),
            html.Div([
                html.Label("Number of Tweets"),
                dcc.Input(id='num_tweets', type='text', value='', style={'width': '100%'}),
            ], style={'width': '100%'}),

            html.Button('Predict', id='predict-button-arima', n_clicks=0, style={'margin-top': '20px'}),
            # Output Graphs
            html.Div([
                html.Div(id='prediction-output-arima'),
                dcc.Graph(id='prediction-graph-arima')
            ], style={'margin-top': '20px', 'width': '100%'}),

    ]),

], style={'background': 'white', 'margin': '3%'})

@app.callback(
    [Output('prediction-output', 'children'),
     Output('prediction-graph-rnn', 'figure')],

    [Input('predict-button', 'n_clicks')],
    [Input('stock-dropdown', 'value'),
     Input('seq-length-input', 'value'),
     Input('predict-dates-input', 'value')]
)
def update_predict_RNN(n_clicks, ticker, seq_length, pred_dates_range):
    if n_clicks == 0:
        return "", {}

   # Load the raw data for graphing
    ticker_df = get_all_ticker_data(ticker)

    pred_price, graph_rnn = predict_RNN(ticker_df, ticker, seq_length, pred_dates_range)
    return pred_price, graph_rnn


@app.callback(
    [Output('prediction-output-arima', 'children'),
     Output('prediction-graph-arima', 'figure')],

    [Input('predict-button-arima', 'n_clicks')],
    [Input('stock-dropdown', 'value'),
     Input('predict-dates-input', 'value'),
     Input('volume', 'value'),
     Input('bert_score', 'value'),
     Input('num_tweets', 'value')]
)
def update_predict_ARIMA(n_clicks, ticker, forecast_steps, volume, bert_score, num_tweets):
    print(n_clicks)
    if n_clicks == 0:
        return "", {}

   # Load the raw data for graphing
    ticker_df = get_all_ticker_data(ticker)

    # Create DataFrame from the provided exogenous variables
    # 'volume', 'bert_score', 'num_tweets'

    exog_forecast = pd.DataFrame({
        'volume': [v for v in volume.split(',')],
        'bert_score': [v for v in bert_score.split(',')],
        'num_tweets': [v for v in num_tweets.split(',')]
    })
    exog_forecast = exog_forecast.astype(float)
    #try:
    pred_price, graph_arima = predict_ARIMA(ticker, ticker_df, forecast_steps, exog_forecast)
    # except:
    #      return "Provide exogenous variables and press Predict", {}
    return pred_price, graph_arima

if __name__ == '__main__':
    app.run_server(debug=True)

0
0
Pulling raw ticker data
RNN Model loaded for AAPL_7.keras
[[[0.84751388]
  [0.80656964]
  [0.80768635]
  [0.80173079]
  [0.78076237]
  [0.74304416]
  [0.76202737]]

 [[0.80656964]
  [0.80768635]
  [0.80173079]
  [0.78076237]
  [0.74304416]
  [0.76202737]
  [0.75408668]]

 [[0.80768635]
  [0.80173079]
  [0.78076237]
  [0.74304416]
  [0.76202737]
  [0.75408668]
  [0.74019043]]

 [[0.80173079]
  [0.78076237]
  [0.74304416]
  [0.76202737]
  [0.75408668]
  [0.74019043]
  [0.73138124]]

 [[0.78076237]
  [0.74304416]
  [0.76202737]
  [0.75408668]
  [0.74019043]
  [0.73138124]
  [0.73150534]]

 [[0.74304416]
  [0.76202737]
  [0.75408668]
  [0.74019043]
  [0.73138124]
  [0.73150534]
  [0.75073665]]

 [[0.76202737]
  [0.75408668]
  [0.74019043]
  [0.73138124]
  [0.73150534]
  [0.75073665]
  [0.68373703]]

 [[0.75408668]
  [0.74019043]
  [0.73138124]
  [0.73150534]
  [0.75073665]
  [0.68373703]
  [0.73485528]]

 [[0.74019043]
  [0.73138124]
  [0.73150534]
  [0.75073665]
  [0.68373703]
  [0.73


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



0
Pulling raw ticker data
RNN Model loaded for AAPL_3.keras
[[[0.80173079]
  [0.78076237]
  [0.74304416]]

 [[0.78076237]
  [0.74304416]
  [0.76202737]]

 [[0.74304416]
  [0.76202737]
  [0.75408668]]

 [[0.76202737]
  [0.75408668]
  [0.74019043]]

 [[0.75408668]
  [0.74019043]
  [0.73138124]]

 [[0.74019043]
  [0.73138124]
  [0.73150534]]

 [[0.73138124]
  [0.73150534]
  [0.75073665]]

 [[0.73150534]
  [0.75073665]
  [0.68373703]]

 [[0.75073665]
  [0.68373703]
  [0.73485528]]

 [[0.68373703]
  [0.73485528]
  [0.65470392]]

 [[0.73485528]
  [0.65470392]
  [0.65358721]]

 [[0.65470392]
  [0.65358721]
  [0.67430753]]

 [[0.65358721]
  [0.67430753]
  [0.73026453]]

 [[0.67430753]
  [0.73026453]
  [0.78088647]]

 [[0.73026453]
  [0.78088647]
  [0.77666802]]

 [[0.78088647]
  [0.77666802]
  [0.74726261]]

 [[0.77666802]
  [0.74726261]
  [0.74292006]]

 [[0.74726261]
  [0.74292006]
  [0.78659383]]

 [[0.74292006]
  [0.78659383]
  [0.78311979]]

 [[0.78659383]
  [0.78311979]
  [0.7837402 ]]




No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



0
Pulling raw ticker data
RNN Model loaded for AAPL_3.keras
[[[0.80173079]
  [0.78076237]
  [0.74304416]]

 [[0.78076237]
  [0.74304416]
  [0.76202737]]

 [[0.74304416]
  [0.76202737]
  [0.75408668]]

 [[0.76202737]
  [0.75408668]
  [0.74019043]]

 [[0.75408668]
  [0.74019043]
  [0.73138124]]

 [[0.74019043]
  [0.73138124]
  [0.73150534]]

 [[0.73138124]
  [0.73150534]
  [0.75073665]]

 [[0.73150534]
  [0.75073665]
  [0.68373703]]

 [[0.75073665]
  [0.68373703]
  [0.73485528]]

 [[0.68373703]
  [0.73485528]
  [0.65470392]]

 [[0.73485528]
  [0.65470392]
  [0.65358721]]

 [[0.65470392]
  [0.65358721]
  [0.67430753]]

 [[0.65358721]
  [0.67430753]
  [0.73026453]]

 [[0.67430753]
  [0.73026453]
  [0.78088647]]

 [[0.73026453]
  [0.78088647]
  [0.77666802]]

 [[0.78088647]
  [0.77666802]
  [0.74726261]]

 [[0.77666802]
  [0.74726261]
  [0.74292006]]

 [[0.74726261]
  [0.74292006]
  [0.78659383]]

 [[0.74292006]
  [0.78659383]
  [0.78311979]]

 [[0.78659383]
  [0.78311979]
  [0.7837402 ]]




No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



In [72]:
# Can be used for ARIMA testing / demo 
# AAPL stock
# [168904800, 88223700, 54930100, 124486200, 121047300, 96452100, 99116600]
# [0.0, 0.6901094913482666, 0.7551156282424927, 0.9880950450897217, 0.677986741065979, 0.4352254867553711, 0.8095578551292419]
# [20.0, 3.0, 1.0, 2.0, 2.0, 1.0, 3.0]

In [None]:
[18734000, 20676000, 6936000, 27860000, 25988000, 26122000, 20238000]
[0.4431171119213104, 0.4431171119213104, 0.5662032961845398, 0.4431171119213104, 0.4431171119213104, 0.4431171119213104, 0.4431171119213104]
[4.5, 4.5, 1.0, 4.5, 4.5, 4.5, 4.5]