In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [62]:
# load data
friday_df = pd.read_csv('hw02/Communication Data/comm-data-Fri.csv')
saturday_df = pd.read_csv('hw02/Communication Data/comm-data-Sat.csv')
sunday_df = pd.read_csv('hw02/Communication Data/comm-data-Sun.csv')

In [63]:
# transform df to timeseries df
friday_df['Timestamp'] = pd.to_datetime(friday_df['Timestamp'])
saturday_df['Timestamp'] = pd.to_datetime(saturday_df['Timestamp'])
sunday_df['Timestamp'] = pd.to_datetime(sunday_df['Timestamp'])
friday_df

Unnamed: 0,Timestamp,from,to,location
0,2014-06-06 08:03:19,439105,1053224,Kiddie Land
1,2014-06-06 08:03:19,439105,1696241,Kiddie Land
2,2014-06-06 08:03:19,439105,580064,Kiddie Land
3,2014-06-06 08:03:19,439105,1464748,Kiddie Land
4,2014-06-06 08:03:47,1836139,1593258,Entry Corridor
...,...,...,...,...
948734,2014-06-06 23:25:54,1778371,1527872,Entry Corridor
948735,2014-06-06 23:25:54,1778371,572089,Entry Corridor
948736,2014-06-06 23:25:54,1778371,802526,Entry Corridor
948737,2014-06-06 23:25:54,1778371,280592,Entry Corridor


In [77]:
# Group timeseries to 5 minute interval
friday_df['Timestamp'] = friday_df['Timestamp'].dt.round('5min')
saturday_df['Timestamp'] = saturday_df['Timestamp'].dt.round('5min')
sunday_df['Timestamp'] = sunday_df['Timestamp'].dt.round('5min')
friday_df

Unnamed: 0,Timestamp,from,to,location
0,2014-06-06 08:05:00,439105,1053224,Kiddie Land
1,2014-06-06 08:05:00,439105,1696241,Kiddie Land
2,2014-06-06 08:05:00,439105,580064,Kiddie Land
3,2014-06-06 08:05:00,439105,1464748,Kiddie Land
4,2014-06-06 08:05:00,1836139,1593258,Entry Corridor
...,...,...,...,...
948734,2014-06-06 23:25:00,1778371,1527872,Entry Corridor
948735,2014-06-06 23:25:00,1778371,572089,Entry Corridor
948736,2014-06-06 23:25:00,1778371,802526,Entry Corridor
948737,2014-06-06 23:25:00,1778371,280592,Entry Corridor


In [84]:
friday_grouped_df = friday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
saturday_grouped_df = saturday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
sunday_grouped_df = sunday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
friday_grouped_df

Unnamed: 0,Timestamp,location,count
0,2014-06-06 08:05:00,Entry Corridor,47
1,2014-06-06 08:05:00,Kiddie Land,22
2,2014-06-06 08:05:00,Tundra Land,4
3,2014-06-06 08:10:00,Entry Corridor,265
4,2014-06-06 08:10:00,Kiddie Land,77
...,...,...,...
907,2014-06-06 23:15:00,Wet Land,1
908,2014-06-06 23:20:00,Entry Corridor,10
909,2014-06-06 23:20:00,Tundra Land,4
910,2014-06-06 23:20:00,Wet Land,36


In [85]:
# create color map to standardize colors
color_map = {
    'Wet Land': 'mediumseagreen',
    'Coaster Alley': 'red',
    'Tundra Land': 'dodgerblue',
    'Entry Corridor': 'mediumpurple',
    'Kiddie Land': 'gold'
}

# Specify the order of locations based on actual location sequence
location_order = ['Coaster Alley', 'Wet Land', 'Kiddie Land', 'Entry Corridor', 'Tundra Land']

In [86]:
import plotly_express as px
import plotly.graph_objects as go
import networkx as nx
from dash import Dash, dcc, html, dash_table
from dash.dependencies import Input, Output

In [111]:
def create_network(df_filtered):
    G = nx.DiGraph()

    # Add edges (from-to pairs with counts as edge weights)
    for _, row in df_filtered.iterrows():
        G.add_edge(row['from'], row['to'], weight=row['count'])

    # Create the network plot
    pos = nx.spring_layout(G)  # Layout for the nodes
    edge_trace = []
    node_trace = []
    annotations = []

    # Create edge traces and add message count annotations
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        weight = edge[2]['weight']  # Get message count (edge weight)

        # Add edge trace (line)
        edge_trace.append(go.Scatter(
            x=[x0, x1, None],
            y=[y0, y1, None],
            line=dict(width=weight/5, color='gray'),  # Fixed width
            hoverinfo='none',
            mode='lines'
        ))

        # Add an arrow for each edge
        annotations.append(
            dict(
                ax=x0, ay=y0, axref='x', ayref='y',
                x=x1, y=y1, xref='x', yref='y',
                showarrow=True,
                arrowhead=3,  # Arrow style
                arrowsize=1.5,  # Size of the arrow
                arrowwidth=2,  # Width of the arrow line
                arrowcolor='gray'
            )
        )


    # Create node traces
    for node in G.nodes():
        x, y = pos[node]
        node_trace.append(go.Scatter(
            x=[x], y=[y],
            text=[node],
            mode='markers+text',
            hoverinfo='text',
            marker=dict(size=20, color='cornflowerblue'),
            textposition="bottom center"
        ))

    # Combine traces
    fig = go.Figure(data=edge_trace + node_trace,
                    layout=go.Layout(
                        title='Top 20 From-To Network with Arrows and Message Counts',
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=0, l=0, r=0, t=40),
                        annotations=annotations,  # Add the message count annotations and arrows here
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False)
                    ))
    return fig


In [112]:
# Initialize Dash app
app = Dash(__name__)

# Layout of the app
app.layout = html.Div([
    dcc.Graph(id='line-chart'),
    dcc.Graph(id='network-graph')
])

# Callback to update the line chart
@app.callback(
    Output('line-chart', 'figure'),
    Input('line-chart', 'clickData')
)
def update_line_chart(clickData):
    fig = px.line(
        sunday_grouped_df,
        x='Timestamp',
        y='count',
        color='location',
        color_discrete_map=color_map,
        title='Trend of Message Sent on Friday',
        labels={'count': 'Message Count', 'Timestamp': 'Time'}
    )
    return fig

# Callback to update the network graph when a point is clicked on the line chart
@app.callback(
    Output('network-graph', 'figure'),
    Input('line-chart', 'clickData')
)
def update_network_graph(clickData):
    if clickData is None:
        return go.Figure()

    # Extract the clicked timestamp and curveNumber (which corresponds to location index)
    clicked_point = clickData['points'][0]
    clicked_time = clicked_point['x']
    clicked_curve_number = clicked_point['curveNumber']

    # Map curveNumber to location using the location_list
    clicked_location = location_order[clicked_curve_number]

    # Filter data for the selected time and location
    filtered_df = sunday_df[(sunday_df['Timestamp'] == clicked_time) & (sunday_df['location'] == clicked_location)]

    # Group by 'from' and 'to' pairs to get message counts
    grouped_from_to = filtered_df.groupby(['from', 'to']).size().reset_index(name='count')

    # Sort by 'count' and take the top 20 "from-to" pairs
    top_20_from_to = grouped_from_to.nlargest(20, 'count')

    # Create the network graph with the top 20 pairs
    return create_network(top_20_from_to)

# Run the Dash app
if __name__ == '__main__':
    app.run_server(debug=True)