In [64]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import geopandas as gpd
import plotly.express as px
import seaborn as sns
import plotly.graph_objects as go

In [65]:
damage_report = pd.read_csv("hw05-data/Damage Reports/mc1-reports-data.csv",low_memory=False)
damage_report

Unnamed: 0,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
0,2020-04-08 17:50:00,10.0,6.0,10.0,3.0,8.0,,1
1,2020-04-09 13:50:00,2.0,10.0,0.0,8.0,4.0,0.0,1
2,2020-04-09 00:20:00,7.0,10.0,10.0,9.0,10.0,0.0,1
3,2020-04-08 17:25:00,1.0,1.0,2.0,10.0,7.0,,1
4,2020-04-08 02:50:00,9.0,7.0,1.0,6.0,9.0,,1
...,...,...,...,...,...,...,...,...
83065,2020-04-10 02:30:00,9.0,10.0,10.0,,7.0,2.0,8
83066,2020-04-10 02:30:00,8.0,10.0,10.0,,7.0,1.0,8
83067,2020-04-09 16:45:00,10.0,9.0,10.0,,8.0,1.0,8
83068,2020-04-09 16:55:00,8.0,8.0,9.0,,7.0,0.0,8


In [66]:
damage_report['time'] = pd.to_datetime(damage_report['time'])

In [67]:
damage_report_sorted = damage_report.sort_values(by='time') #sort df based on time
damage_report_sorted.set_index('time', inplace=True) # set time back to index
damage_report_sorted

Unnamed: 0_level_0,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-04-06,2.0,4.0,1.0,,4.0,,17
2020-04-06,2.0,8.0,5.0,,3.0,0.0,2
2020-04-06,9.0,7.0,5.0,,4.0,,8
2020-04-06,10.0,2.0,9.0,,10.0,,4
2020-04-06,6.0,2.0,9.0,,4.0,1.0,15
...,...,...,...,...,...,...,...
2020-04-11,7.0,7.0,8.0,10.0,10.0,,6
2020-04-11,5.0,2.0,2.0,,8.0,,2
2020-04-11,5.0,1.0,3.0,,2.0,,15
2020-04-11,4.0,9.0,7.0,,7.0,,13


In [68]:
# Aggregate by time, location, and geometry, calculating the mean while ignoring NaN values
damage_report_sorted_resampled = damage_report_sorted.resample('10min').mean().reset_index()
damage_report_sorted_resampled

Unnamed: 0,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
0,2020-04-06 00:00:00,6.350000,4.700000,5.350000,7.000000,5.450000,0.333333,9.250000
1,2020-04-06 00:10:00,4.611111,5.722222,5.111111,6.142857,5.777778,0.600000,7.666667
2,2020-04-06 00:20:00,3.833333,4.083333,5.750000,5.166667,4.416667,,9.583333
3,2020-04-06 00:30:00,5.700000,5.800000,4.900000,5.750000,3.900000,1.000000,7.800000
4,2020-04-06 00:40:00,6.714286,4.928571,4.357143,4.125000,4.285714,0.666667,5.571429
...,...,...,...,...,...,...,...,...
716,2020-04-10 23:20:00,4.772727,4.772727,4.590909,4.583333,6.409091,0.500000,10.636364
717,2020-04-10 23:30:00,5.555556,5.000000,3.555556,5.555556,4.888889,0.500000,8.944444
718,2020-04-10 23:40:00,6.000000,5.400000,4.800000,1.800000,5.800000,0.500000,7.900000
719,2020-04-10 23:50:00,5.692308,4.923077,5.230769,6.333333,5.076923,1.000000,9.846154


In [69]:
# Aggregate by time, location, and geometry, calculating the mean while ignoring NaN values
damage_report_sorted_aggregated = damage_report_sorted.groupby(['time', 'location']).agg({
    'sewer_and_water': lambda x: x.mean(skipna=True),
    'power': lambda x: x.mean(skipna=True),
    'roads_and_bridges': lambda x: x.mean(skipna=True),
    'medical': lambda x: x.mean(skipna=True),
    'buildings': lambda x: x.mean(skipna=True),
    'shake_intensity': lambda x: x.mean(skipna=True)
}).reset_index()
damage_report_sorted_aggregated

Unnamed: 0,time,location,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity
0,2020-04-06,2,2.0,8.0,5.0,,3.0,0.0
1,2020-04-06,3,6.0,3.0,10.0,10.0,9.0,
2,2020-04-06,4,9.0,1.5,9.5,,6.5,1.0
3,2020-04-06,8,9.0,7.0,5.0,,4.0,
4,2020-04-06,15,6.0,2.0,9.0,,4.0,1.0
...,...,...,...,...,...,...,...,...
10284,2020-04-11,2,6.0,1.5,5.0,,4.5,0.0
10285,2020-04-11,6,7.0,7.0,8.0,10.0,10.0,
10286,2020-04-11,13,4.0,9.0,7.0,,7.0,
10287,2020-04-11,15,5.0,1.0,3.0,,2.0,


In [73]:
# Draw visualization of the infrastructure damage trend over time

# Convert DataFrame to long format
damage_report_sorted_resampled_long = pd.melt(damage_report_sorted_resampled, id_vars=['time', 'location'], 
                  value_vars=['sewer_and_water', 'power', 'roads_and_bridges', 'medical', 'buildings', 'shake_intensity'],
                  var_name='Infrastructure', value_name='Value')

# Plot the line chart
fig = px.line(damage_report_sorted_resampled_long, x='time', y='Value', color='Infrastructure', title="Infrastructure Changes (Averaged) Over Time",
              labels={'time': 'Time', 'Value': 'Infrastructure Value'})

# Set initial x-axis rang
fig.update_xaxes(range=[damage_report_sorted_resampled['time'].min(), damage_report_sorted_resampled['time'].min() + pd.DateOffset(days=5)])  # Example zoom to first 6 months

# Enable drag-to-zoom and add a range slider
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()

In [None]:
# Draw visualization of the infrastructure damage of individual location trend over time -- NOT used

damage_report_sorted_aggregated_long = pd.melt(damage_report_sorted_aggregated, id_vars=['time', 'location'], 
                  value_vars=['sewer_and_water', 'power', 'roads_and_bridges', 'medical', 'buildings', 'shake_intensity'],
                  var_name='Infrastructure', value_name='Value')

locations = damage_report_sorted_aggregated_long['location'].unique()

# Create the figure with traces for each location, set to be hidden by default
fig = go.Figure()

for loc in locations:
    filtered_data = damage_report_sorted_aggregated_long[damage_report_sorted_aggregated_long['location'] == loc]
    visible_status = True if loc == locations[0] else 'legendonly'  # Show only the first location by default
    for infrastructure in filtered_data['Infrastructure'].unique():
        infrastructure_data = filtered_data[filtered_data['Infrastructure'] == infrastructure]
        fig.add_trace(go.Scatter(
            x=infrastructure_data['time'], 
            y=infrastructure_data['Value'], 
            mode='lines',
            name=f"{infrastructure} - Location {loc}",
            visible=visible_status
        ))

# Create dropdown buttons to toggle visibility
dropdown_buttons = []
for i, loc in enumerate(locations):
    visibility = ['legendonly'] * len(locations) * len(filtered_data['Infrastructure'].unique())
    start_index = i * len(filtered_data['Infrastructure'].unique())
    for j in range(len(filtered_data['Infrastructure'].unique())):
        visibility[start_index + j] = True

    button = dict(
        method="update",
        label=f"Location {loc}",
        args=[{"visible": visibility}, {"title": f"Infrastructure Changes Over Time for Individual Locations {loc}"}]
    )
    dropdown_buttons.append(button)

# Update layout with the dropdown
fig.update_layout(
    updatemenus=[
        {
            "buttons": dropdown_buttons,
            "direction": "down",
            "showactive": True,
        }
    ],
    xaxis_title="Time",
    yaxis_title="Infrastructure Value",
    title="Infrastructure Changes Over Time"
)

# Enable drag-to-zoom and add a range slider
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()