In [60]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import sklearn.metrics as metrics
import sklearn.preprocessing as preprocessing
import statsmodels.api as sm
from dash import html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import dash_html_components as html
from jupyter_dash import JupyterDash
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential

In [14]:
data = pd.read_csv('weekly_sentiment_ukraine.csv', index_col=0)

In [15]:
data.head()

Unnamed: 0,id,year,week,avg_sentiment
0,0,2022,37,0.057567
1,1,2022,36,0.073651
2,2,2022,44,0.062976
3,3,2022,35,0.072307
4,4,2022,39,0.05535


In [16]:
# Create a date column from the year and week columns
data['date'] = pd.to_datetime(data['year'].astype(str), format='%Y') + \
                pd.to_timedelta(data['week'].mul(7).astype(str) + ' days')

data.drop(['id', 'year', 'week'], axis=1, inplace=True)

data.set_index('date', inplace=True)

data.sort_index(inplace=True)

data = data.iloc[:-1]

data.head()

Unnamed: 0_level_0,avg_sentiment
date,Unnamed: 1_level_1
2022-08-20,0.055036
2022-08-27,0.074953
2022-09-03,0.072307
2022-09-10,0.073651
2022-09-17,0.057567


In [18]:
data = data.resample('W').mean()

In [59]:
len(data)

37

In [20]:
data = data.asfreq('W')

In [8]:
# convert date to a date object
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 37 entries, 2022-08-21 to 2023-04-30
Freq: W-SUN
Data columns (total 1 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   avg_sentiment  17 non-null     float64
dtypes: float64(1)
memory usage: 592.0 bytes


In [9]:
data.columns

Index(['avg_sentiment'], dtype='object')

In [89]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.H1('Forecasting Sentiment', style = {'color': 'white'}),
    dcc.Tabs([
        dcc.Tab([
            dcc.Graph(id='time-series-plot'),
            dcc.Slider(
                id='date-slider',
                min=0,
                max=len(data.index.unique()) - 1,
                value=len(data.index.unique()) - 1,
                marks={i: {'label': data.index.unique()[i].strftime('%Y-%m-%d'), 'style': {'writing-mode': 'vertical-lr', 'text-orientation': 'mixed', 'white-space': 'nowrap'}} for i in range(len(data.index.unique()))},
                step=None
            ),
        ], label='Time Series'),
        dcc.Tab([
            html.Div([
                html.H3('ARIMA Model', style = {'color': 'white'}),
                html.Label('Training data %:', style = {'color': 'white'}),
                dcc.Input(id = 'train-percentage', value = 80, type = 'number'),
                html.Label('Order (p,d,q):', style = {'color': 'white'}),
                dcc.Input(id = 'arima-p', value = 1, type = 'number'),
                dcc.Input(id = 'arima-d', value = 1, type = 'number'),
                dcc.Input(id = 'arima-q', value = 1, type = 'number'),
                html.Button('Train Model', id='train-button', n_clicks=0),
                dcc.Graph(id='arima-plot')
            ])
        ], label = 'ARIMA Model'),

        dcc.Tab([
            html.Div([
                html.H3("LSTM Model", style={'color': 'white'}),
                html.Label("Training data percentage (0-100):", style={'color': 'white'}),
                dcc.Input(id='train-percentage-lstm', type='number', value=70, min=0, max=100, step=1),
                html.Label("Number of time steps:", style={'color': 'white'}),
                dcc.Input(id='time-steps', type='number', value=5, min=1, step=1),
                html.Label("Number of LSTM units:", style={'color': 'white'}),
                dcc.Input(id='lstm-units', type='number', value=5, min=1, step=1),
                html.Label("Number of epochs:", style={'color': 'white'}),
                dcc.Input(id='epochs', type='number', value=10, min=1, step=1),
                html.Button("Train Model", id='train-button-lstm', n_clicks=0),
                dcc.Graph(id='lstm-plot')
            ])
        ], label='LSTM Model')

    ]), 
])

@app.callback(
    Output('time-series-plot', 'figure'),
    Input('date-slider', 'value'))
def update_timeseries(selected_date_index):
    selected_date = data.index.unique()[selected_date_index]
    filtered_df = data.loc[data.index <= selected_date]
    fig = px.line(filtered_df, x=filtered_df.index, y='avg_sentiment')
    return fig

@app.callback(
    Output('arima-plot', 'figure'),
    [Input('train-button', 'n_clicks')],
    [dash.dependencies.State('train-percentage', 'value'),
     dash.dependencies.State('arima-p', 'value'),
     dash.dependencies.State('arima-d', 'value'),
     dash.dependencies.State('arima-q', 'value')]
)
def train_arima(n_clicks, train_percentage, p, d, q):
    if n_clicks == 0:
        return go.Figure()

    train_size = int(len(data) * train_percentage / 100)
    train_data = data.iloc[:train_size]
    test_data = data.iloc[train_size:]

    model = sm.tsa.arima.ARIMA(train_data['avg_sentiment'], order=(p, d, q))
    model_fit = model.fit()

    forecast = model_fit.predict(start=train_data.index[-1], end=test_data.index[-1], dynamic=True)

    arima_mse = round(mean_squared_error(test_data.values.flatten(), forecast.loc[test_data.index].values), ndigits = 3)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data.index, y=data['avg_sentiment'], mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=train_data.index, y=model_fit.fittedvalues, mode='lines', name='Fit (Training)'))
    fig.add_trace(go.Scatter(x=test_data.index, y=forecast, mode='lines', name='Forecast (Testing)'))
    fig.update_layout(title=f'Actual vs. Predicted (ARIMA Model). MSE (Test): {arima_mse}', xaxis_title='Date', yaxis_title='Sentiment',)

    return fig


@app.callback(
    Output('lstm-plot', 'figure'),
    [Input('train-button-lstm', 'n_clicks')],
    [dash.dependencies.State('train-percentage-lstm', 'value'),
     dash.dependencies.State('lstm-units', 'value'),
     dash.dependencies.State('epochs', 'value')]
)
def train_lstm(n_clicks, train_percentage, lstm_units, lstm_epochs):

    if n_clicks == 0:
        return go.Figure()

    train_size = int(len(data) * train_percentage / 100)
    train_data = data.iloc[:train_size]

    # Scale the data
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_data = scaler.fit_transform(train_data)

    # Split the data into training and testing sets
    X_train, y_train = [], []
    for i in range(lstm_units, len(scaled_data)):
        X_train.append(scaled_data[i-lstm_units:i, 0])
        y_train.append(scaled_data[i, 0])
    X_train, y_train = np.array(X_train), np.array(y_train)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

    # Create the LSTM model
    model = Sequential()
    model.add(LSTM(units=lstm_units, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(units=lstm_units))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=lstm_epochs, batch_size=32)

    # Create predictions for training and testing sets
    train_predictions = scaler.inverse_transform(model.predict(X_train))

    test_data = data.iloc[train_size:]
    scaled_test_data = scaler.transform(test_data)
    X_test, y_test = [], []
    for i in range(lstm_units, len(scaled_test_data)):
        X_test.append(scaled_test_data[i-lstm_units:i, 0])
        y_test.append(scaled_test_data[i, 0])
    X_test, y_test = np.array(X_test), np.array(y_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    test_predictions = scaler.inverse_transform(model.predict(X_test))

    lstm_mse = round(mean_squared_error(test_data['avg_sentiment'][lstm_units:], test_predictions), ndigits = 3)

    # Plot the results
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=train_data.index, y=train_data['avg_sentiment'], name='Actual (train)'))
    fig.add_trace(go.Scatter(x=train_data.index[lstm_units:], y=train_predictions.flatten(), name='Predicted (train)'))
    fig.add_trace(go.Scatter(x=test_data.index, y=test_data['avg_sentiment'], name='Actual (test)'))
    fig.add_trace(go.Scatter(x=test_data.index[lstm_units:], y=test_predictions.flatten(), name='Predicted (test)'))
    fig.update_layout(title=f'Actual vs. Predicted (LSTM Model). MSE (Test): {lstm_mse}', xaxis_title='Date', yaxis_title='Sentiment')
    return fig




app.run_server(mode='inline')

