In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import sklearn.metrics as metrics
import sklearn.preprocessing as preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import dash
from dash import html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import dash_html_components as html
from jupyter_dash import JupyterDash
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


In [2]:
# Read data

data = pd.read_csv('musk_tweets_sent.csv', index_col=0)

In [3]:
data.head()

Unnamed: 0_level_0,avg_sentiment_score
date,Unnamed: 1_level_1
01/01/2018,0.267459
08/01/2018,0.206667
15/01/2018,0.102976
22/01/2018,0.107843
29/01/2018,0.053262


In [4]:
# Convert date index to pandas datetime

data.index = pd.to_datetime(data.index, format='%d/%m/%Y')

In [5]:
# Check the first few rows

data.head()

Unnamed: 0_level_0,avg_sentiment_score
date,Unnamed: 1_level_1
2018-01-01,0.267459
2018-01-08,0.206667
2018-01-15,0.102976
2018-01-22,0.107843
2018-01-29,0.053262


In [6]:
# Resample by averaging across weeks

data = data.resample('W').mean()

In [8]:
# Set frequency

data = data.asfreq('W')

In [11]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.H1('Forecasting Sentiment', style = {'color': 'white'}),
    dcc.Tabs([
        dcc.Tab([
            dcc.Graph(id='time-series-plot'),
            dcc.Slider(
                id='date-slider',
                min=0,
                max=len(data.index.unique()) - 1,
                value=len(data.index.unique()) - 1,
                marks={i: {'label': data.index.unique()[i].strftime('%Y-%m-%d'), 'style': {'writing-mode': 'vertical-lr', 'text-orientation': 'mixed', 'white-space': 'nowrap'}} for i in range(len(data.index.unique()))},
                step=None
            ),
        ], label='Time Series'),
        dcc.Tab([
            html.Div([
                html.H3('ARIMA Model', style = {'color': 'white'}),
                html.Label('Training data %:', style = {'color': 'white'}),
                dcc.Input(id = 'train-percentage', value = 80, type = 'number'),
                html.Label('Order (p,d,q):', style = {'color': 'white'}),
                dcc.Input(id = 'arima-p', value = 1, type = 'number'),
                dcc.Input(id = 'arima-d', value = 1, type = 'number'),
                dcc.Input(id = 'arima-q', value = 1, type = 'number'),
                html.Button('Train Model', id='train-button', n_clicks=0),
                dcc.Graph(id='arima-plot')
            ])
        ], label = 'ARIMA Model'),

        dcc.Tab([
            html.Div([
                html.H3("LSTM Model", style={'color': 'white'}),
                html.Label("Training data percentage (0-100):", style={'color': 'white'}),
                dcc.Input(id='train-percentage-lstm', type='number', value=70, min=0, max=100, step=1),
                html.Label("Number of time steps:", style={'color': 'white'}),
                dcc.Input(id='time-steps', type='number', value=5, min=1, step=1),
                html.Label("Number of LSTM units:", style={'color': 'white'}),
                dcc.Input(id='lstm-units', type='number', value=5, min=1, step=1),
                html.Label("Number of epochs:", style={'color': 'white'}),
                dcc.Input(id='epochs', type='number', value=10, min=1, step=1),
                html.Button("Train Model", id='train-button-lstm', n_clicks=0),
                dcc.Graph(id='lstm-plot')
            ])
        ], label='LSTM Model')

    ]), 
])

@app.callback(
    Output('time-series-plot', 'figure'),
    Input('date-slider', 'value'))
def update_timeseries(selected_date_index):
    selected_date = data.index.unique()[selected_date_index]
    filtered_df = data.loc[data.index <= selected_date]
    fig = px.line(filtered_df, x=filtered_df.index, y='avg_sentiment_score')
    return fig

@app.callback(
    Output('arima-plot', 'figure'),
    [Input('train-button', 'n_clicks')],
    [dash.dependencies.State('train-percentage', 'value'),
     dash.dependencies.State('arima-p', 'value'),
     dash.dependencies.State('arima-d', 'value'),
     dash.dependencies.State('arima-q', 'value')]
)

# create arima model
def train_arima(n_clicks, train_percentage, p, d, q):
    if n_clicks == 0:
        return go.Figure()

    train_size = int(len(data) * train_percentage / 100)
    train_data = data.iloc[:train_size]
    test_data = data.iloc[train_size:]

    model = sm.tsa.arima.ARIMA(train_data['avg_sentiment_score'], order=(p, d, q))
    model_fit = model.fit()

    forecast = model_fit.predict(start=train_data.index[-1], end=test_data.index[-1], dynamic=True)

    arima_mse = round(mean_squared_error(test_data.values.flatten(), forecast.loc[test_data.index].values), ndigits = 3)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data['avg_sentiment_score'], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=train_data.index, y=model_fit.fittedvalues, mode='lines', name='Fit (Training)'))
    fig.add_trace(go.Scatter(x=test_data.index, y=forecast, mode='lines', name='Forecast (Testing)'))
    fig.update_layout(title=f'Actual vs. Predicted (ARIMA Model). MSE (Test): {arima_mse}', xaxis_title='Date', yaxis_title='Sentiment',)

    return fig


@app.callback(
    Output('lstm-plot', 'figure'),
    [Input('train-button-lstm', 'n_clicks')],
    [dash.dependencies.State('train-percentage-lstm', 'value'),
     dash.dependencies.State('lstm-units', 'value'),
     dash.dependencies.State('epochs', 'value')]
)
def train_lstm(n_clicks, train_percentage, lstm_units, lstm_epochs):

    if n_clicks == 0:
        return go.Figure()

    train_size = int(len(data) * train_percentage / 100)
    train_data = data.iloc[:train_size]

    # Scale the data
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_data = scaler.fit_transform(train_data)

    # Split the data into training and testing sets
    X_train, y_train = [], []
    for i in range(lstm_units, len(scaled_data)):
        X_train.append(scaled_data[i-lstm_units:i, 0])
        y_train.append(scaled_data[i, 0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

    # Create the LSTM model
    model = Sequential()
    model.add(LSTM(units=lstm_units, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(units=lstm_units))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=lstm_epochs, batch_size=32)

    # Create predictions for training and testing sets
    train_predictions = scaler.inverse_transform(model.predict(X_train))

    test_data = data.iloc[train_size:]
    scaled_test_data = scaler.transform(test_data)
    X_test, y_test = [], []
    for i in range(lstm_units, len(scaled_test_data)):
        X_test.append(scaled_test_data[i-lstm_units:i, 0])
        y_test.append(scaled_test_data[i, 0])
    X_test, y_test = np.array(X_test), np.array(y_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    test_predictions = scaler.inverse_transform(model.predict(X_test))

    lstm_mse = round(mean_squared_error(test_data['avg_sentiment_score'][lstm_units:], test_predictions), ndigits = 3)

    # Plot the results
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_data.index, y=train_data['avg_sentiment_score'], name='Actual (train)'))
    fig.add_trace(go.Scatter(x=train_data.index[lstm_units:], y=train_predictions.flatten(), name='Predicted (train)'))
    fig.add_trace(go.Scatter(x=test_data.index, y=test_data['avg_sentiment_score'], name='Actual (test)'))
    fig.add_trace(go.Scatter(x=test_data.index[lstm_units:], y=test_predictions.flatten(), name='Predicted (test)'))
    fig.update_layout(title=f'Actual vs. Predicted (LSTM Model). MSE (Test): {lstm_mse}', xaxis_title='Date', yaxis_title='Sentiment')
    return fig




app.run_server(mode='inline')

