# Install Necessary Packages

In [1]:
# Install all required packages
!pip install ipywidgets requests pandas matplotlib plotly wordcloud





# Load Packages

In [2]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from collections import defaultdict
import requests
import json
import datetime
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateLocator
import pandas as pd
from wordcloud import WordCloud

# Backend APIs


In [None]:
API_collect = "http://backend-service.backend.svc.cluster.local:5000/api/collect"  # Need to be replaced
API_search = "http://backend-service.backend.svc.cluster.local:5000/api/run"  # Need to be replaced

# Collect Data by Selected Platform

In [4]:
def on_run_clicked_collect(btn):
    clear_output(wait=True)
    display(keyword, platforms, btn)

    payload = {
        "keyword": keyword.value,
        "platform": list(platforms.value)
    }
    print("🔘 Button was clicked!")
#     print("Running")
    try:
        resq = requests.post(API_collect, json=payload)
#         print("Status Code:", resq.status_code)
#         print("Raw Response Body:", repr(resq.text))
        print("Successed")
    except requests.exceptions.RequestException as e:
        print("Failed", e)

# Input text box
keyword = widgets.Text(
    value = 'weather',
    description='Keyword:',
    placeholder='Area of interest'
)

# Multi-selection for social media platforms:
platforms = widgets.SelectMultiple(
    options=['mastodon','reddit','bluesky'],
    value=['mastodon','reddit','bluesky'],
    description='Social Media:'
)

# Excecute button
run_btn_collect = widgets.Button(description='Run!')

# Capture the status
display(keyword, platforms, run_btn_collect)
run_btn_collect.on_click(on_run_clicked_collect)

Text(value='weather', description='Keyword:', placeholder='Area of interest')

SelectMultiple(description='Social Media:', index=(0, 1, 2), options=('mastodon', 'reddit', 'bluesky'), value=…

Button(description='Run!', style=ButtonStyle())

# Function for Chart Plot

In [5]:
# Default results for test
default_result = {
    "labels": ["A", "B", "C", "A", "B", "C","A", "B", "C","A", "B", "C"],
    "times": ["2025-01-01", "2025-01-02", "2025-01-03", "2025-01-04", "2025-01-05", "2025-01-06","2025-01-07", "2025-01-08", "2025-01-09", "2025-01-10", "2025-01-11", "2025-01-12"],
    "values": [10, 40, 30, 10, 40, 30, 10, 40, 30, 10, 40, 30]
}

In [6]:
# Accumulate value for same label
def acc_same_label(labels, values):
    acc = defaultdict(float)
    for label, value in zip(labels, values):
        acc[label] += value 
    labels = acc.keys()
    values = acc.values()
    return labels, values

## Line Chart

In [7]:
# Line chart
def plot_line(result=default_result, title="Line chart", keyword="AI"):
    times = result.get('times', [])
    values = result.get('values', [])
    if len(times) == 0 or len(values) == 0:
        print("Not valid")
        return
    times = pd.to_datetime(times)
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.plot(times, values)
    locator = AutoDateLocator(maxticks=6)
    ax.xaxis.set_major_locator(locator)
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.set_title(title)
    fig.autofmt_xdate(rotation=45, ha='right')
    plt.show()

In [8]:
# Specific Line chart for topic1
def plot_line_topic1(result=default_result, title="Hourly Posting Trend", keyword="AI"):
    plt.figure(figsize=(10, 5))
    result["Weekday"].plot(label="Weekday", marker="o", linewidth=2)
    result["Weekend"].plot(label="Weekend", marker="s", linewidth=2)

    plt.title(f"Hourly Posting Trend for Tag '{keyword.upper()}' (Weekday vs Weekend)")
    plt.xlabel("Hour of Day")
    plt.ylabel("Average Posts")
    plt.xticks(range(0, 24))
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()

## Bar Chart

In [9]:
# Bar chart
def plot_bar(result=default_result, title="Bar Chart", keyword="AI"):
    labels = result.get('labels', [])
    values = result.get('values', [])
    if len(labels) == 0 or len(values) == 0:
        print("Not valid")
        return
    labels, values = acc_same_label(labels, values)
    plt.figure()
    plt.bar(labels, values)
    plt.xticks(rotation=45, ha="right")
    plt.xlabel("Category")
    plt.ylabel("Value")
    plt.title(title)
    plt.show()

## Pie Chart

In [10]:
# Pie chart
def plot_pie(result=default_result, title="Pie chart", keyword="AI"):
    labels = result.get('labels', [])
    values = result.get('values', [])
    if len(labels) == 0 or len(values) == 0:
        print("Not valid")
        return
    labels, values = acc_same_label(labels, values)
    plt.figure()
    plt.pie(values, labels=labels, autopct='%1.1f%%')
    plt.title(title)
    plt.show()

## Word Cloud

In [11]:
# Cloud chart
def plot_cloud(result=default_result, title="Word Cloud", keyword="AI"):
    labels = result.get('labels', [])
    values = result.get('values', [])
    if len(labels) == 0 or len(values) == 0:
        print("Not valid")
        return
    labels, values = acc_same_label(labels, values)
    freq = dict(zip(labels, values))
    wc = WordCloud(
        width=800,
        height=400,
        background_color='white'
    )
    wc.generate_from_frequencies(freq)
    plt.figure(figsize=(10, 5))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.title(title)
    plt.show()

## Chart Dispatch

In [12]:
# chart dispatch     
chart_dispatch = {
    "line": plot_line,
    "bar": plot_bar,
    "pie": plot_pie,
    "word cloud": plot_cloud,
    "topic1_line": plot_line_topic1
}

# Topic Research and Result Display

## Keyword Text Box

In [13]:
# Input text box
keyword = widgets.Text(
    value = 'ai',
    description='Keyword'
)

## Topic Single Selection

In [14]:
# Single-selection for topics:
topics = widgets.Select(
    options=['topic1: User post count',
             'topic2: Cross-analysis of AI and other topics',
             'topic3: AI topic popularity analysis',
             'topic4: Sentiment analysis across time'],
    value='topic1: User post count',
    description='Topics',
    layout=widgets.Layout(width='700px', height='120px')
)

# Respond to topic selection changeing
def on_topic_change(change):
    topic = change['new']
    if topic == 'topic1: User post count':
        databases.value = ['mastodon_ai_cleaned']
        checkbox.value=False
        chart_types.options=['topic1_line']
        chart_types.value=['topic1_line']
    elif topic == 'topic2: Cross-analysis of AI and other topics':
        databases.value = ['mastodon_ai_cleaned']
        checkbox.value=True
        chart_types.options=['bar', 'word cloud']
        chart_types.value=['bar', 'word cloud']
    elif topic == 'topic3: AI topic popularity analysis':
        databases.value = ['mastodon_ai_cleaned']
        checkbox.value=False
        chart_types.options=['line']
        chart_types.value=['line']
    elif topic == 'topic4: Sentiment analysis across time':
        databases.value = ['mastodon_ai_cleaned']
        checkbox.value=True
        chart_types.options=['line', 'pie']
        chart_types.value=['line', 'pie']
    clear_output(wait=True)
    custom_display()
    
topics.observe(on_topic_change, names='value')

## Database Multi-Selection

In [15]:
# Multi-selection for social media databases:
databases = widgets.SelectMultiple(
    options=['mastodon_ai','mastodon_public','mastodon_weather', 'mastodon_ai_cleaned', 'reddit_public', 'reddit_public_cleaned'],
    value=['mastodon_ai_cleaned'],
    description='Database',
    layout=widgets.Layout(width='700px', height='120px')
)

## Chart Type Multi-Selection

In [16]:
# multi-selection for types of charts:
chart_types = widgets.SelectMultiple(
    options=['topic1_line'],
    value=['topic1_line'],
    description='charts',
    layout=widgets.Layout(width='350px', height='120px')
)

## Multi-Chart Display Checkbox

In [17]:
# Checkbox for allowing multi-graph display
checkbox = widgets.Checkbox(
    value=False,
    description='Multi-graph display',
    disabled=False,
    indent=False,
    layout=widgets.Layout(width='350px', height='120px')
)

# Horizontal box for chart option display
hbox_chart = widgets.HBox([chart_types, checkbox])

## Start Time & End Time Date Picker

In [18]:
# Time range
start_time = widgets.DatePicker(
    description='Start Date',
    value=datetime.date(2025, 1, 1),
    layout=widgets.Layout(width='700px', height='30px'))

end_time = widgets.DatePicker(
    description='End Date',
    value=datetime.date(2025, 5, 10),
    layout=widgets.Layout(width='700px', height='30px'))

## Search & Plot button

In [19]:
# Excecute button
run_btn_search = widgets.Button(description='Search!', layout=widgets.Layout(width='350px', height='30px'))
run_btn_display = widgets.Button(description='Display!', layout=widgets.Layout(width='350px', height='30px'))
# Horizontal box for button display
hbox_button = widgets.HBox([run_btn_search, run_btn_display])

## Custom UI Display

In [20]:
def custom_display():
    clear_output(wait=True)
    if topics.value == 'topic1: User post count' or topics.value == 'topic4: Sentiment analysis across time':
        clear_output(wait=True)
        display(topics, keyword, databases, hbox_chart, start_time, end_time, hbox_button)
    else:
        display(topics, databases, hbox_chart, start_time, end_time, hbox_button)

## Clip Data by Time

In [21]:
def clipped_result(result):
    df = pd.DataFrame(result)
    df['times'] = pd.to_datetime(df['times']).dt.date
    mask  = df["times"].between(start_time.value, end_time.value)
    df    = df.loc[mask]
    return df

## Search & Plot Logic

In [22]:
# Initialize result with a default result
result = default_result
# To store the results
result1 = result2 = result3 = result4 = default_result

def on_run_clicked_search(btn):
    clear_output(wait=True)
    custom_display()
    global result, result1, result2, result3, result4
    
    payload = {
        "topic": topics.value,
        "database": list(databases.value),
        "start_time": str(start_time.value),
        "end_time": str(end_time.value)
    }
    topic = topics.value
    if topic == 'topic1: User post count' or topic == 'topic4: Sentiment analysis across time':
        payload["keyword"] = keyword.value
    print("🔘 Button was clicked!")
    try:
        print("runing")
        resq = requests.post(API_search, json=payload)
#         print("Status Code:", resq.status_code)
#         print("Raw Response Body:", repr(resq.text))
        resq.raise_for_status()
        result = resq.json()
        print("Successed")
        print(result)
        if topics.value == 'topic1: User post count':
            result1 = result['result']
        elif topics.value == 'topic2: Cross-analysis of AI and other topics':
            result2 = result['result']
        elif topics.value == 'topic3: AI topic popularity analysis':
            result3 = result['result']
        elif topics.value == 'topic4: Sentiment analysis across time':
            result4 = result['result']
    except requests.exceptions.RequestException as e:
        result = default_result # Once we have the actual result from backend, can delete this one.
        print("Failed", e)

def on_run_clicked_display(btn):
    clear_output(wait=True)
    custom_display()
    for chart in chart_types.value:
        topic = topics.value
        title = ""
        if topic == 'topic1: User post count':
            print(f"{'Username':<25} {'Nick name':<25} {'Number of posts':<25} {'Top3 tag':<50}")
            print("-" * 100)
            for u in result1["top_users"]:
                print(f"{u['username']:<25} {u['nickname']:<25} posts={u['post_count']:<10} tags={u['top_tags']:<50}")
            hourly_json = result1['hourly']
            df = pd.read_json(hourly_json, orient='split')
            chart_dispatch.get(chart)(df, keyword=keyword.value)
        elif topic == 'topic2: Cross-analysis of AI and other topics':
            if chart == "bar":
                top_n = 20
                title = f"Top {top_n} Most Frequent AI-Related Tags"
                df = pd.DataFrame(result2)
                chart_dispatch.get("bar")(df.head(20), title, keyword=keyword.value)
            if chart == "word cloud":
                print(f"Total extracted {sum(result2['values']):,} tags, {len(result2['labels']):,} unique tags")
                title = "Word Cloud of AI-Related Tags"
                chart_dispatch.get("word cloud")(df, title, keyword=keyword.value)
        elif topic == 'topic3: AI topic popularity analysis':
            title = "Daily Posts with Tag 'AI'" 
            df = result3["line"]
            df_clipped = clipped_result(df)
            chart_dispatch.get(chart)(df_clipped, title, keyword=keyword.value)
            print("=== Top 8 Peak Dates ===")
            for peak in result3["peaks"]:
                print(f"{peak['date']} — {peak['count']}")
        elif topic == 'topic4: Sentiment analysis across time':
            if chart == "pie":
                title = f"Sentiment Distribution for {keyword.value}-related Posts"
                df = result4["pie"]
                chart_dispatch.get(chart)(df, title, keyword=keyword.value)
            if chart == "line":
                title = "Daily Average Sentiment (Compound Score)"
                df = result4["line"]
                df_clipped = clipped_result(df)
                chart_dispatch.get(chart)(df_clipped, title, keyword=keyword.value)
        
custom_display()
run_btn_search.on_click(on_run_clicked_search)
run_btn_display.on_click(on_run_clicked_display)

Select(description='Topics', layout=Layout(height='120px', width='700px'), options=('topic1: User post count',…

Text(value='ai', description='Keyword')

SelectMultiple(description='Database', index=(3,), layout=Layout(height='120px', width='700px'), options=('mas…

HBox(children=(SelectMultiple(description='charts', index=(0,), layout=Layout(height='120px', width='350px'), …

DatePicker(value=datetime.date(2025, 1, 1), description='Start Date', layout=Layout(height='30px', width='700p…

DatePicker(value=datetime.date(2025, 5, 10), description='End Date', layout=Layout(height='30px', width='700px…

HBox(children=(Button(description='Search!', layout=Layout(height='30px', width='350px'), style=ButtonStyle())…

🔘 Button was clicked!
runing
Failed HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /api/run (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001CE24E825E0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
