# Title: Interactive Dashboard

## Plots Contained:
- **Line Chart**
- **Pie Chart**
- **Word Cloud**
- **Sentiment Summary Table**

### Contain two dashboards:
- **Dashboard 1** is focused on daily data for **November 2024**, allowing for detailed insights into sentiment trends per day.
- **Dashboard 2** is focused on aggregated data in **3-day intervals**, providing a broader overview of sentiment trends over slightly longer time periods to get better general trend for each sentiment.


### Dashboard 1: November 2024 Data - Daily Interval
- **Objective**: This dashboard displays data for **November 2024**, with each data point representing one day.
- **Plots**:
  - **Line Chart**: Displays sentiment trends on a **daily** basis, showing the count of each sentiment (negative, neutral, positive) per day.
  - **Pie Chart**: Shows the proportion of each sentiment for the entire month of **November 2024**.
  - **Word Cloud**: Displays the most frequently used words from the posts in **November 2024**.
  - **Sentiment Summary Table**: Provides a summary of the count of each sentiment in **November 2024**.

In [2]:
from dash import Dash, dcc, html, Input, Output, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import base64
from io import BytesIO
from wordcloud import WordCloud, STOPWORDS
import subprocess
from PIL import Image

# Initialize the app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Function to load and clean the latest data
def load_data():

    # Load the data
    df = pd.read_csv('filtered_data_november_2024_streaming.csv')
    #df = pd.read_csv('streaming_sentiment_analysis.csv')
    
    # Ensure 'Date' column exists and is in datetime format
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')  # Coerce invalid formats to NaT
    else:
        raise KeyError("The 'Date' column is missing in the dataset.")
    
    # Drop rows with invalid or missing 'Date' values
    df = df.dropna(subset=['Date'])
    
    # Normalize 'Date' column (strip time information)
    df['Date'] = df['Date'].dt.date
    
    # Extract year and month from 'Date'
    df['Year'] = df['Date'].apply(lambda x: x.year)
    df['Month'] = df['Date'].apply(lambda x: x.month)
    df['sentiment_group'] = pd.cut(df['SelfTextSentimentScore'], bins=[-float('inf'), -0.1, 0.1, float('inf')], 
                                   labels=['negative', 'neutral', 'positive'])
    
    return df


# Load the data once
df = load_data()

# Function to generate word cloud image
def generate_wordcloud(text): 
    wordcloud = WordCloud(width=400, height=400, background_color='white', stopwords=STOPWORDS, max_words= 50).generate(text)
    img = BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    img.seek(0)
    return f"data:image/png;base64,{base64.b64encode(img.getvalue()).decode()}"

# Layout for the app
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Climate Change Sentiment Analysis Dashboard Each Day (Nov 2024)"), className="mb-2")
    ]),
    
    dbc.Row([
        dbc.Col(dcc.Graph(id='line-graph'), width=8),
        dbc.Col(dcc.Graph(id='pie-chart'), width=4),
    ]),

    
    dbc.Row([
        # Word Cloud
        dbc.Col([
            dbc.Card(
                dbc.CardBody([
                    html.Img(id='word-cloud', style={'width': '400px', 'height': '400px', 'display': 'block', 'margin': 'auto'})
                ]),
                style={"width": "450px", "margin": "auto", "border": "1px solid #ccc", "padding": "10px"}
            ),
        ], width=8),
        
        # Adding a DataTable for the sentiment counts
        dbc.Col([
            dbc.Card(
                dbc.CardBody([
                    html.H5("Sentiment Summary", className="card-title"),
                    dash_table.DataTable(
                        id='sentiment-table',
                        columns=[
                            {"name": "Sentiment", "id": "sentiment"},
                            {"name": "Count", "id": "count"}
                        ],
                        style_table={'overflowX': 'auto'},
                        style_cell={'textAlign': 'center'},
                        style_header={'fontWeight': 'bold'}
                    )
                ]),
                style={"width": "450px", "margin": "auto", "border": "1px solid #ccc", "padding": "10px"}
            ),
        ], width=4)
    ]),
    
    dbc.Row([
        dbc.Col(dcc.Interval(id='interval-component', interval=60000, n_intervals=0))
    ]) 
], fluid=True)

# Combined Callback for both Graphs, Word Cloud, Table and Styles
@app.callback(
    [
        Output('line-graph', 'figure'),
        Output('pie-chart', 'figure'),
        Output('word-cloud', 'src'),
        Output('sentiment-table', 'data')
    ],
    [Input('interval-component', 'n_intervals')]
)
def update_graphs_and_styles(n_intervals):

    df = load_data()
    
    # Line Graph
    grouped = (df.groupby(['Date', 'sentiment_group'], observed=False)
       .size()
       .unstack(fill_value=0)
       .reset_index())

    # Create the line plot
    line_fig = px.line(
        grouped, 
        x='Date', 
        y=['negative', 'neutral', 'positive'],  # Plot separate lines for each sentiment
        labels={'value': 'Count of Sentiments', 'Date': 'Date'},
        title="Sentiment Groups by Each Day (Nov 2024)",
        markers=True,
        color_discrete_map={
            'negative': 'hotpink', 
            'neutral': 'steelblue', 
            'positive': 'mediumseagreen'
        }
    )

    # Pie Chart
    sentiment_counts = df['sentiment_group'].value_counts().reset_index()
    sentiment_counts.columns = ['sentiment_group', 'count']
    
    pie_fig = px.pie(
        sentiment_counts, 
        names='sentiment_group', 
        values='count', 
        title="Sentiment Group Distribution (Nov 2024)",
        color='sentiment_group',  # Optional: Assign colors to categories
        color_discrete_map={'negative': 'hotpink', 'neutral': 'steelblue', 'positive': 'mediumseagreen'}
    )

    # Word Cloud
    text = ' '.join(df['SelfText'].dropna())
    wordcloud_img = generate_wordcloud(text)

    # Sentiment Counts Table
    sentiment_table_data = [
                {"sentiment": "Negative", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'negative', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'negative', 'count'].size > 0 else 0},
                {"sentiment": "Neutral", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'neutral', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'neutral', 'count'].size > 0 else 0},
                {"sentiment": "Positive", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'positive', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'positive', 'count'].size > 0 else 0},
                {"sentiment": "Total", "count": sentiment_counts['count'].sum()},
            ]

    return line_fig, pie_fig, wordcloud_img, sentiment_table_data

if __name__ == '__main__':
    app.run_server(debug=True, port = 8080)
    subprocess.Popen(['/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 'http://127.0.0.1:8080/'])


### Dashboard 2: November 2024 Data - 3-Day Interval
- **Objective**: This dashboard displays data for **November 2024**, but aggregates data into **3-day intervals**.
- **Plots**:
  - **Line Chart**: Displays sentiment trends on a **3-day** basis, showing the count of each sentiment (negative, neutral, positive) for every 3-day interval.
  - **Pie Chart**: Shows the proportion of each sentiment for the entire month of **November 2024**.
  - **Word Cloud**: Displays the most frequently used words from the posts in the **November 2024**.
  - **Sentiment Summary Table**: Provides a summary of the count of each sentiment in **November 2024**.

In [None]:
from dash import Dash, dcc, html, Input, Output, dash_table
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import base64
from io import BytesIO
from wordcloud import WordCloud, STOPWORDS
import subprocess
from PIL import Image

# Initialize the app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Function to load and clean the latest data
def load_data():

    # Load the data
    df = pd.read_csv('filtered_data_november_2024_streaming.csv')
    #df = pd.read_csv('streaming_sentiment_analysis.csv')
    
    # Ensure 'Date' column exists and is in datetime format
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')  # Coerce invalid formats to NaT
    else:
        raise KeyError("The 'Date' column is missing in the dataset.")
    
    # Drop rows with invalid or missing 'Date' values
    df = df.dropna(subset=['Date'])
    
    # Normalize 'Date' column (strip time information)
    df['Date'] = df['Date'].dt.date
    
    # Extract year and month from 'Date'
    df['Year'] = df['Date'].apply(lambda x: x.year)
    df['Month'] = df['Date'].apply(lambda x: x.month)
    df['sentiment_group'] = pd.cut(df['SelfTextSentimentScore'], bins=[-float('inf'), -0.1, 0.1, float('inf')], 
                                   labels=['negative', 'neutral', 'positive'])
    
    return df


# Load the data once
df = load_data()

# Function to generate word cloud image
def generate_wordcloud(text): 
    wordcloud = WordCloud(width=400, height=400, background_color='white', stopwords=STOPWORDS, max_words= 50).generate(text)
    img = BytesIO()
    wordcloud.to_image().save(img, format='PNG')
    img.seek(0)
    return f"data:image/png;base64,{base64.b64encode(img.getvalue()).decode()}"

# Layout for the app
app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Climate Change Sentiment Analysis Dashboard 3-Day Intervals (Nov 2024)"), className="mb-2")
    ]),
    
    dbc.Row([
        dbc.Col(dcc.Graph(id='line-graph'), width=8),
        dbc.Col(dcc.Graph(id='pie-chart'), width=4),
    ]),

    
    dbc.Row([
        # Word Cloud
        dbc.Col([
            dbc.Card(
                dbc.CardBody([
                    html.Img(id='word-cloud', style={'width': '400px', 'height': '400px', 'display': 'block', 'margin': 'auto'})
                ]),
                style={"width": "450px", "margin": "auto", "border": "1px solid #ccc", "padding": "10px"}
            ),
        ], width=8),
        
        # Adding a DataTable for the sentiment counts
        dbc.Col([
            dbc.Card(
                dbc.CardBody([
                    html.H5("Sentiment Summary", className="card-title"),
                    dash_table.DataTable(
                        id='sentiment-table',
                        columns=[
                            {"name": "Sentiment", "id": "sentiment"},
                            {"name": "Count", "id": "count"}
                        ],
                        style_table={'overflowX': 'auto'},
                        style_cell={'textAlign': 'center'},
                        style_header={'fontWeight': 'bold'}
                    )
                ]),
                style={"width": "450px", "margin": "auto", "border": "1px solid #ccc", "padding": "10px"}
            ),
        ], width=4)
    ]),
    
    dbc.Row([
        dbc.Col(dcc.Interval(id='interval-component', interval=60000, n_intervals=0))
    ]) 
], fluid=True)

# Combined Callback for both Graphs, Word Cloud, Table and Styles
@app.callback(
    [
        Output('line-graph', 'figure'),
        Output('pie-chart', 'figure'),
        Output('word-cloud', 'src'),
        Output('sentiment-table', 'data')
    ],
    [Input('interval-component', 'n_intervals')]
)
def update_graphs_and_styles(n_intervals):

    df = load_data()
    
    # Group by 'Date' and 'sentiment_group', and count the occurrences
    grouped = (df.groupby(['Date', 'sentiment_group'], observed=False)
               .size()
               .unstack(fill_value=0)
               .reset_index())
    
    # Convert the 'Date' column to datetime if not already in that format
    grouped['Date'] = pd.to_datetime(grouped['Date'])
    
    # Resample the data to calculate the sum for each 3-day period
    grouped.set_index('Date', inplace=True)
    grouped_resampled = grouped.resample('3D').sum().reset_index()
    
    # Adjust the 'Date' to reflect the last day of each 3-day period
    grouped_resampled['Date'] = grouped_resampled['Date'] + pd.Timedelta(days=2)
    
    # Create the line plot
    line_fig = px.line(
        grouped_resampled, 
        x='Date', 
        y=['negative', 'neutral', 'positive'],  # Plot separate lines for each sentiment group
        labels={'value': 'Count of Sentiments', 'Date': 'Date'},
        title="Sentiment Groups Over 3-Day Intervals (Nov 2024)",
        markers=True,
        color_discrete_map={
            'negative': 'hotpink', 
            'neutral': 'steelblue', 
            'positive': 'mediumseagreen'
        }
    )


    # Pie Chart
    sentiment_counts = df['sentiment_group'].value_counts().reset_index()
    sentiment_counts.columns = ['sentiment_group', 'count']
    
    pie_fig = px.pie(
        sentiment_counts, 
        names='sentiment_group', 
        values='count', 
        title="Sentiment Group Distribution (Nov 2024)",
        color='sentiment_group',  # Optional: Assign colors to categories
        color_discrete_map={'negative': 'hotpink', 'neutral': 'steelblue', 'positive': 'mediumseagreen'}
    )

    # Word Cloud
    text = ' '.join(df['SelfText'].dropna())
    wordcloud_img = generate_wordcloud(text)

    # Sentiment Counts Table
    sentiment_table_data = [
                {"sentiment": "Negative", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'negative', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'negative', 'count'].size > 0 else 0},
                {"sentiment": "Neutral", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'neutral', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'neutral', 'count'].size > 0 else 0},
                {"sentiment": "Positive", "count": sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'positive', 'count'].values[0] 
                 if sentiment_counts.loc[sentiment_counts['sentiment_group'] == 'positive', 'count'].size > 0 else 0},
                {"sentiment": "Total", "count": sentiment_counts['count'].sum()},
            ]

    return line_fig, pie_fig, wordcloud_img, sentiment_table_data

if __name__ == '__main__':
    app.run_server(debug=True, port = 8001)
    subprocess.Popen(['/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 'http://127.0.0.1:8001/'])