In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [13]:
friday_df = pd.read_csv('hw02-data/Communication Data/comm-data-Fri.csv')
saturday_df = pd.read_csv('hw02-data/Communication Data/comm-data-Sat.csv')
sunday_df = pd.read_csv('hw02-data/Communication Data/comm-data-Sun.csv')

In [14]:
# transform time to timeseries
friday_df['Timestamp'] = pd.to_datetime(friday_df['Timestamp'])
saturday_df['Timestamp'] = pd.to_datetime(saturday_df['Timestamp'])
sunday_df['Timestamp'] = pd.to_datetime(sunday_df['Timestamp'])
friday_df

Unnamed: 0,Timestamp,from,to,location
0,2014-06-06 08:03:19,439105,1053224,Kiddie Land
1,2014-06-06 08:03:19,439105,1696241,Kiddie Land
2,2014-06-06 08:03:19,439105,580064,Kiddie Land
3,2014-06-06 08:03:19,439105,1464748,Kiddie Land
4,2014-06-06 08:03:47,1836139,1593258,Entry Corridor
...,...,...,...,...
948734,2014-06-06 23:25:54,1778371,1527872,Entry Corridor
948735,2014-06-06 23:25:54,1778371,572089,Entry Corridor
948736,2014-06-06 23:25:54,1778371,802526,Entry Corridor
948737,2014-06-06 23:25:54,1778371,280592,Entry Corridor


In [15]:
# Count the message sent by timeseries and location
friday_grouped_df = friday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
saturday_grouped_df = saturday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
sunday_grouped_df = sunday_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
friday_grouped_df

Unnamed: 0,Timestamp,location,count
0,2014-06-06 08:03:19,Kiddie Land,4
1,2014-06-06 08:03:47,Entry Corridor,1
2,2014-06-06 08:04:06,Kiddie Land,4
3,2014-06-06 08:04:22,Kiddie Land,4
4,2014-06-06 08:04:33,Kiddie Land,4
...,...,...,...
108800,2014-06-06 23:22:28,Entry Corridor,1
108801,2014-06-06 23:22:59,Entry Corridor,1
108802,2014-06-06 23:23:22,Entry Corridor,1
108803,2014-06-06 23:25:05,Entry Corridor,1


In [16]:
# Group timeseries to 5 minute interval
friday_5min_df = friday_df.copy()
friday_5min_df['Timestamp'] = friday_df['Timestamp'].dt.round('5min')
saturday_5min_df = saturday_df.copy()
saturday_5min_df['Timestamp'] = saturday_df['Timestamp'].dt.round('5min')
sunday_5min_df = sunday_df.copy()
sunday_5min_df['Timestamp'] = sunday_df['Timestamp'].dt.round('5min')
friday_5min_df

Unnamed: 0,Timestamp,from,to,location
0,2014-06-06 08:05:00,439105,1053224,Kiddie Land
1,2014-06-06 08:05:00,439105,1696241,Kiddie Land
2,2014-06-06 08:05:00,439105,580064,Kiddie Land
3,2014-06-06 08:05:00,439105,1464748,Kiddie Land
4,2014-06-06 08:05:00,1836139,1593258,Entry Corridor
...,...,...,...,...
948734,2014-06-06 23:25:00,1778371,1527872,Entry Corridor
948735,2014-06-06 23:25:00,1778371,572089,Entry Corridor
948736,2014-06-06 23:25:00,1778371,802526,Entry Corridor
948737,2014-06-06 23:25:00,1778371,280592,Entry Corridor


In [17]:
# Count the message sent by timeseries and location
friday_grouped_5min_df = friday_5min_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
saturday_grouped_5min_df = saturday_5min_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
sunday_grouped_5min_df = sunday_5min_df.groupby(['Timestamp', 'location']).size().reset_index(name='count')
friday_grouped_5min_df

Unnamed: 0,Timestamp,location,count
0,2014-06-06 08:05:00,Entry Corridor,47
1,2014-06-06 08:05:00,Kiddie Land,22
2,2014-06-06 08:05:00,Tundra Land,4
3,2014-06-06 08:10:00,Entry Corridor,265
4,2014-06-06 08:10:00,Kiddie Land,77
...,...,...,...
907,2014-06-06 23:15:00,Wet Land,1
908,2014-06-06 23:20:00,Entry Corridor,10
909,2014-06-06 23:20:00,Tundra Land,4
910,2014-06-06 23:20:00,Wet Land,36


In [18]:
# create color map to standardize colors
color_mapping = {
    'Tundra Land': 'dodgerblue',
    'Entry Corridor': 'mediumpurple',
    'Kiddie Land': 'gold',
    'Wet Land': 'mediumseagreen',
    'Coaster Alley': 'red',
}

# Specify the order of locations based on actual location sequence
location_order = ['Coaster Alley', 'Wet Land', 'Kiddie Land', 'Entry Corridor', 'Tundra Land']

In [19]:
import plotly_express as px
import plotly.graph_objects as go
import networkx as nx
from dash import Dash, dcc, html, dash_table
from dash.dependencies import Input, Output
from dash import dash_table

In [20]:
def create_network(df_filtered):
    G = nx.DiGraph()

    # Add edges (from-to pairs with counts as edge weights)
    for _, row in df_filtered.iterrows():
        G.add_edge(row['from'], row['to'], weight=row['count'])

    # Create the network plot
    pos = nx.spring_layout(G)  # Layout for the nodes
    edge_trace = []
    node_trace = []
    annotations = []

    # Create edge traces and add message count annotations
    for edge in G.edges(data=True):
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        weight = edge[2]['weight']  # Get message count (edge weight)

        # Add edge trace (line)
        edge_trace.append(go.Scatter(
            x=[x0, x1, None],
            y=[y0, y1, None],
            line=dict(width=weight/5, color='gray'),
            hoverinfo='none',
            mode='lines'
        ))

        # Add an arrow for each edge
        annotations.append(
            dict(
                ax=x0, ay=y0, axref='x', ayref='y',
                x=x1, y=y1, xref='x', yref='y',
                showarrow=True,
                arrowhead=3,
                arrowsize=1.5,
                arrowwidth=2,
                arrowcolor='gray'
            )
        )

    # Create node traces
    for node in G.nodes():
        x, y = pos[node]
        node_trace.append(go.Scatter(
            x=[x], y=[y],
            text=[node],
            mode='markers+text',
            hoverinfo='text',
            marker=dict(size=20, color='cornflowerblue'),
            textposition="bottom center"
        ))

    # Combine traces
    fig = go.Figure(data=edge_trace + node_trace,
                    layout=go.Layout(
                        title='Top 20 Message From-To Pairs',
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=0, l=0, r=0, t=40),
                        annotations=annotations, 
                        xaxis=dict(showgrid=False, zeroline=False),
                        yaxis=dict(showgrid=False, zeroline=False)
                    ))
    return fig


In [21]:
def visualize_df(df,grouped_df,date):

    # Initialize Dash app
    app = Dash(__name__)

    # Layout of the app
    app.layout = html.Div([
        dcc.Graph(id='line-chart'), 
        
        html.Div([
            html.Div([
                dcc.Graph(id='network-graph', style={'height': '400px', 'width': '100%'}) 
            ], style={'flex': '1', 'margin-right': '50px'}),
            
            html.Div([
                html.H4("All From-To Message Pairs in Descending Order", style={'textAlign': 'left', 'font-family': 'verdana, arial, sans-serif'}),
                dash_table.DataTable(
                    id='message-table',
                    columns=[
                        {'name': 'From', 'id': 'from'},
                        {'name': 'To', 'id': 'to'},
                        {'name': 'Message Count', 'id': 'count'}
                    ],
                    data=[],  # Initially empty
                    style_table={'height': '400px', 'overflowY': 'auto', 'width': '60%'}, 
                    style_cell={'textAlign': 'center', 'padding': '5px'},
                    style_header={
                        'backgroundColor': 'paleturquoise',
                        'fontWeight': 'bold'
                    }
                )
            ], style={'flex': '1'}) 
        ], style={'display': 'flex', 'width': '100%', 'backgroundColor': 'white'}) 
    ], style={'backgroundColor': 'white'})

    # Callback to update the line chart
    @app.callback(
        Output('line-chart', 'figure'),
        Input('line-chart', 'clickData')
    )
    def update_line_chart(clickData):
        fig = px.line(
            grouped_df,
            x='Timestamp',
            y='count',
            color='location',
            color_discrete_map=color_mapping,
            category_orders={'location': location_order},  # Ensure the correct order
            title='Trend of Message Sent on '+date,
            labels={'count': 'Message Count', 'Timestamp': 'Time'}
        )
        return fig

    # Callback to update the network graph and the message table when a point is clicked on the line chart
    @app.callback(
        [Output('network-graph', 'figure'), Output('message-table', 'data'), Output('message-table', 'style_header')],
        Input('line-chart', 'clickData')
    )
    def update_network_graph_and_table(clickData):
        # Initialize return values
        network_fig = go.Figure()  # Empty figure by default
        table_data = []
        header_style = {
            'backgroundColor': 'paleturquoise',
            'fontWeight': 'bold'
        }

        # Check if clickData is None
        if clickData is None:
            return network_fig, table_data, header_style

        # Get location & time of click
        clicked_point = clickData['points'][0]
        clicked_time = clicked_point['x']
        clicked_curve_number = clicked_point['curveNumber']
        clicked_location = location_order[clicked_curve_number]

        # Filter data for the selected time and location
        filtered_df = df[(df['Timestamp'] == clicked_time) & (df['location'] == clicked_location)]

        # Group by from-to pairs to get message counts
        grouped_from_to = filtered_df.groupby(['from', 'to']).size().reset_index(name='count').sort_values(by='count', ascending=False)

        # Update the table
        table_data = grouped_from_to.to_dict('records')

        # Update the header color based on the clicked location
        header_color = color_mapping.get(clicked_location, 'paleturquoise') 
        header_style = {
            'backgroundColor': header_color,
            'fontWeight': 'bold'
        }

        # Create the network graph with the top 20 pairs
        top_20_from_to = grouped_from_to.nlargest(20, 'count')
        network_fig = create_network(top_20_from_to)

        return network_fig, table_data, header_style

    if __name__ == '__main__':
        app.run_server(debug=True)

Uncomment to visualize a specific date's communication information, either original ones or 5-min grouped ones

In [22]:
# visualize_df(friday_df,friday_grouped_df,"Friday Jun 6th")
# visualize_df(saturday_df,saturday_grouped_df, "Saturday Jun 7th")
# visualize_df(sunday_df,sunday_grouped_df, "Sunday Jun 8th")

# visualize_df(friday_5min_df,friday_grouped_5min_df,"Friday Jun 6th")
# visualize_df(saturday_5min_df,saturday_grouped_5min_df, "Saturday Jun 7th")
visualize_df(sunday_5min_df,sunday_grouped_5min_df, "Sunday Jun 8th")


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

