# Interactive Dashboard

## Load the models

In [1]:
# pip install panel altair

In [1]:
# For loading the LSTM model
from tensorflow.keras.models import load_model
# For loading the RidgeRegressor Model
from skforecast.utils import load_forecaster
from sklearn.preprocessing import MinMaxScaler
import panel as pn
import altair as alt
from altair import datum
import datetime as dt
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
from pyspark.sql.functions import date_format
import pandas as pd
import numpy as np
pn.extension()
alt.renderers.enable("default")

warnings.filterwarnings("ignore")
write_config = {
    "uri": "mongodb://localhost:27017/CA2.raw_tweets?retryWrites=true&w=majority",
    "database": "CA2",
    "raw_collection": "raw_tweets",
    "modified_collection": "modified_tweets",
    "aggregates_collection": "daily_data",
    "writeConcern.w": "majority"
}

2023-11-16 19:43:23.238959: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-16 19:43:23.265240: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-16 19:43:23.265274: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-16 19:43:23.265289: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-16 19:43:23.269839: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-16 19:43:23.270147: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [2]:
hourlyDataSparkDF = spark.read\
    .format("mongodb")\
    .option("uri", write_config["uri"])\
    .option("database", write_config["database"])\
    .option("collection", "hourly_data_imputed")\
    .option("writeConcern.w", write_config["writeConcern.w"])\
    .load()\
    .sort("date")
hourlyDataDF = hourlyDataSparkDF.withColumn("date", date_format("date", "yyyy-MM-dd HH:mm:ss")).toPandas()
hourlyDataDF['date'] = pd.to_datetime(hourlyDataDF['date'], format='ISO8601')
hourlyDataDF.set_index("date", inplace=True)
hourlyDataDF = hourlyDataDF.asfreq('H')
hourlyDataDF.drop("_id", axis=1, inplace=True)
hourlyDataDF.drop("average_sentiment", axis=1, inplace=True)
hourlyDataDF.drop("negative_tweet_ratio", axis=1, inplace=True)
hourlyDataDF.drop("neutral_tweet_ratio", axis=1, inplace=True)
hourlyDataDF.drop("positive_tweet_ratio", axis=1, inplace=True)
hourlyDataDF.drop("negative_tweet_ratio_knn", axis=1, inplace=True)
hourlyDataDF.drop("neutral_tweet_ratio_knn", axis=1, inplace=True)
hourlyDataDF.drop("positive_tweet_ratio_knn", axis=1, inplace=True)

In [3]:
# Ridge Regression Model
ridge3MModel = load_forecaster('ridge3MModel.py', verbose=True)

ForecasterAutoregMultiVariate 
Regressor: Ridge(alpha=1, random_state=42) 
Lags: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24] 
Transformer for series: StandardScaler() 
Transformer for exog: None 
Weight function included: False 
Window size: 24 
Target series, level: average_sentiment_knn 
Multivariate series (names): ['average_sentiment_knn', 'negative_tweet_ratio_knn', 'neutral_tweet_ratio_knn', 'positive_tweet_ratio_knn'] 
Maximum steps predicted: 1503 
Exogenous included: False 
Type of exogenous variable: None 
Exogenous variables names: None 
Training range: [Timestamp('2009-04-07 00:00:00'), Timestamp('2009-06-25 12:00:00')] 
Training index type: DatetimeIndex 
Training index frequency: H 
Regressor parameters: {'alpha': 1, 'copy_X': True, 'fit_intercept': True, 'max_iter': None, 'positive': False, 'random_state': 42, 'solver': 'auto', 'tol': 0.0001} 
fit_kwargs: {} 
Creation date: 2023-11-14 22:56:17 
Last fit date: 2023-11-14 23:19:40 
Skforecast 

In [8]:
lookback = 168
forecast = 168

def LSTMPredict(forecast):
    scaler = MinMaxScaler(feature_range=(0, 1))
    # temp dataframe
    df = pd.DataFrame(columns=["average_sentiment_knn"])
    # get the last timestamp from the available data
    lastTimeStamp = hourlyDataDF.iloc[-1:].index.values[0]
    #get the sentiment of the last 24 rows
    previous = hourlyDataDF.iloc[-lookback:].values.reshape(1, lookback, 1)
    print(previous)
    predicited = list()
    y = hourlyDataDF["average_sentiment_knn"].values.reshape(-1, 1)
    # Loop until the end of the forecasted date
    for i in range(forecast + 1):
        predictions = LSTM.predict(previous, verbose=False).reshape(-1, 1)
        predicted_unscaled = predictions
        print(predictions.shape, predictions.shape[0], predictions[0][0])
        # Remove the last 1
        for l in range(predicted_unscaled.shape[0]):
            index = pd.to_datetime(lastTimeStamp, format="%Y-%m-%d %H:%M:%S") + dt.timedelta(hours=l)
            df.loc[index] = { "average_sentiment_knn": predicted_unscaled[l][0] }
            lastTimeStamp = index
        previous = predictions.reshape(1, lookback, 1)
#         # Reshape first the inputs
#         a = np.reshape(previous, (1, 24))
# #         print(a)
#         # Start prediction
#         prediction = LSTM.predict(a, verbose=False)
#         #Get the predicted value
#         newPrediction = np.reshape(prediction, (1))[0]
#         # remove the first element of the array
#         previous.pop(0)
#         # add the newly predicted value. This will make the input still equal to 24
#         previous.append(newPrediction)
#         # Add to the dataframe with the timestamp index
#         index = pd.to_datetime(lastTimeStamp, format="%Y-%m-%d %H:%M:%S") + dt.timedelta(hours=i)
#         df.loc[index] = { "average_sentiment_knn": newPrediction }
    return df

After loading the LSTM, predict the values up to 3 months so that it will not keep on repeating the prediction when the input changes in the dashboard. Predicting through LSTM can take some time to finish especially with this large input

In [9]:
LSTM = load_model("LSTMMultiDay")
predicted = LSTMPredict(13)

[[[-0.01492138]
  [-0.02621379]
  [-0.0262714 ]
  [-0.01528061]
  [-0.01561729]
  [-0.00953718]
  [-0.00950094]
  [-0.01395639]
  [-0.01638801]
  [-0.03060169]
  [-0.00548252]
  [-0.00780549]
  [-0.01287183]
  [-0.01901274]
  [-0.00285836]
  [-0.02430554]
  [-0.02329988]
  [-0.02608801]
  [-0.02218089]
  [-0.00125124]
  [-0.01574138]
  [-0.01376315]
  [-0.00776736]
  [-0.01195863]
  [-0.00642188]
  [-0.02367285]
  [-0.01929325]
  [-0.01586922]
  [-0.00823572]
  [-0.02098959]
  [-0.01659764]
  [-0.02430977]
  [-0.00933904]
  [-0.01473934]
  [-0.02446686]
  [-0.00645299]
  [-0.00641279]
  [-0.02148087]
  [-0.02225621]
  [-0.00849287]
  [-0.01509588]
  [-0.0222136 ]
  [-0.03168428]
  [-0.0101654 ]
  [-0.00363446]
  [-0.01481156]
  [-0.01616287]
  [-0.02152961]
  [-0.0056217 ]
  [-0.01722971]
  [-0.01739523]
  [-0.01920573]
  [-0.0005139 ]
  [-0.00529056]
  [-0.02076551]
  [-0.01982645]
  [-0.00498506]
  [-0.01691674]
  [-0.00929471]
  [-0.0077114 ]
  [-0.00882609]
  [ 0.01564268]
  [-0.00

In [10]:
predicted

Unnamed: 0,average_sentiment_knn
2009-06-25 12:00:00,-0.023028
2009-06-25 13:00:00,0.001413
2009-06-25 15:00:00,0.038859
2009-06-25 18:00:00,-0.022969
2009-06-25 22:00:00,-0.018063
...,...
2031-10-23 22:00:00,0.112816
2031-10-30 18:00:00,0.112554
2031-11-06 15:00:00,0.112566
2031-11-13 13:00:00,0.112309


In [17]:
from matplotlib.backends.backend_agg import FigureCanvas
from matplotlib.figure import Figure

forecast = pn.widgets.Select(name="Forecast", options={"1 week": 168, "1 month": 730, "3 months": 2190})
startDate = dt.datetime(2009, 4, 7)
endDate= dt.datetime(2009, 6, 25) + dt.timedelta(hours=168)

date_range_slider = pn.widgets.DateRangeSlider(
    name="Date Range",
    start=startDate,
    end=endDate,
    value=(startDate, endDate),    
)

# forecast.param.watch(update_enddate, 'value')
@pn.depends(forecast.param.value, watch=True)
def update_enddate(forecast):
    endDataDate = dt.datetime(2009, 6, 25)
    endDate = endDataDate + dt.timedelta(hours=forecast)
    date_range_slider.end = endDate
    date_range_slider.value = (startDate, endDate)
    print(endDate)

@pn.depends(forecast.param.value, date_range_slider.param.value)
def update_forecast(forecast, dateRange):
    ridgePrediction = forecastRidge(forecast)
    
    fig = plot(ridgePrediction, dateRange)
    return fig


def forecastRidge(forecast):
    newForecast = forecast if forecast <= 1503 else 1503
    
    predictions = ridge3MModel.predict(steps=newForecast)
    return predictions

def plot(df, dateRange):
    startDate = dateRange[0].strftime("%Y-%m-%d")
    endDate = dateRange[1].strftime("%Y-%m-%d")
    endDataDate = dt.datetime(2009, 6, 25).strftime("%Y-%m-%d")
    fig = Figure()
    FigureCanvas(fig) # not needed in mpl >= 3.1
    ax = fig.add_subplot()
    hourlyDataDF.loc[startDate : endDataDate, "average_sentiment_knn"].plot(label="Actual Values", ax=ax)
    preds = df.loc[endDataDate : endDate, "average_sentiment_knn"]
    if(preds.empty == False):
        preds.plot(label="Ridge Reggression Predicted Values", ax=ax)
    
    ax.legend()
    
    # For LSTM where the predicted values have been done in the previous steps.
    LSTMPreds = predicted.loc[endDataDate : endDate, "average_sentiment_knn"]
    if(LSTMPreds.empty == False):
        LSTMPreds.plot(label="LSTM Predicted Values", ax=ax)

    fig.suptitle("Predicted Sentiment")
    return fig

In [18]:
layout = pn.Column(
    "<h1>Sentiment Prediction Dashboard</h1>",
    pn.Row(forecast, date_range_slider),
    update_forecast
)

In [20]:
layout