In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import geopandas as gpd
import plotly.express as px
import seaborn as sns

In [2]:
damage_report = pd.read_csv("hw05-data/Damage Reports/mc1-reports-data.csv",low_memory=False)
damage_report

Unnamed: 0,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
0,2020-04-08 17:50:00,10.0,6.0,10.0,3.0,8.0,,1
1,2020-04-09 13:50:00,2.0,10.0,0.0,8.0,4.0,0.0,1
2,2020-04-09 00:20:00,7.0,10.0,10.0,9.0,10.0,0.0,1
3,2020-04-08 17:25:00,1.0,1.0,2.0,10.0,7.0,,1
4,2020-04-08 02:50:00,9.0,7.0,1.0,6.0,9.0,,1
...,...,...,...,...,...,...,...,...
83065,2020-04-10 02:30:00,9.0,10.0,10.0,,7.0,2.0,8
83066,2020-04-10 02:30:00,8.0,10.0,10.0,,7.0,1.0,8
83067,2020-04-09 16:45:00,10.0,9.0,10.0,,8.0,1.0,8
83068,2020-04-09 16:55:00,8.0,8.0,9.0,,7.0,0.0,8


In [5]:
damage_report['time'] = pd.to_datetime(damage_report['time'])

In [10]:
damage_report_sorted = damage_report.sort_values(by='time') #sort df based on time
damage_report_sorted

Unnamed: 0,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
9945,2020-04-06,2.0,4.0,1.0,,4.0,,17
529,2020-04-06,2.0,8.0,5.0,,3.0,0.0,2
4925,2020-04-06,9.0,7.0,5.0,,4.0,,8
2460,2020-04-06,10.0,2.0,9.0,,10.0,,4
8554,2020-04-06,6.0,2.0,9.0,,4.0,1.0,15
...,...,...,...,...,...,...,...,...
3761,2020-04-11,7.0,7.0,8.0,10.0,10.0,,6
1310,2020-04-11,5.0,2.0,2.0,,8.0,,2
8582,2020-04-11,5.0,1.0,3.0,,2.0,,15
7343,2020-04-11,4.0,9.0,7.0,,7.0,,13


In [11]:
damage_report_sorted.set_index('time',inplace=True) # set time back to index
damage_report_sorted_resampled = damage_report_sorted.resample('5min').mean().reset_index() # resample for better viewing 
damage_report_sorted_resampled

Unnamed: 0,time,sewer_and_water,power,roads_and_bridges,medical,buildings,shake_intensity,location
0,2020-04-06 00:00:00,6.500000,4.250000,6.625000,10.000000,5.625000,0.500000,8.875000
1,2020-04-06 00:05:00,6.250000,5.000000,4.500000,5.500000,5.333333,0.000000,9.500000
2,2020-04-06 00:10:00,3.000000,6.272727,4.363636,7.200000,5.363636,0.600000,7.727273
3,2020-04-06 00:15:00,7.142857,4.857143,6.285714,3.500000,6.428571,,7.571429
4,2020-04-06 00:20:00,4.250000,5.000000,7.000000,7.333333,4.500000,,10.000000
...,...,...,...,...,...,...,...,...
1436,2020-04-10 23:40:00,7.750000,6.250000,5.500000,2.333333,6.500000,1.000000,5.500000
1437,2020-04-10 23:45:00,4.833333,4.833333,4.333333,1.000000,5.333333,0.333333,9.500000
1438,2020-04-10 23:50:00,3.857143,5.285714,3.285714,5.600000,5.714286,1.000000,9.142857
1439,2020-04-10 23:55:00,7.833333,4.500000,7.500000,10.000000,4.333333,,10.666667


In [12]:
# Draw visualization of the infrastructure damage trend over time

# Convert DataFrame to long format
damage_report_sorted_clean_long = pd.melt(damage_report_sorted_resampled, id_vars=['time', 'location'], 
                  value_vars=['sewer_and_water', 'power', 'roads_and_bridges', 'medical', 'buildings', 'shake_intensity'],
                  var_name='Infrastructure', value_name='Value')

# Plot the line chart
fig = px.line(damage_report_sorted_clean_long, x='time', y='Value', color='Infrastructure', title="Infrastructure Changes Over Time",
              labels={'time': 'Time', 'Value': 'Infrastructure Value'})

# Set initial x-axis rang
fig.update_xaxes(range=[damage_report_sorted_resampled['time'].min(), damage_report_sorted_resampled['time'].min() + pd.DateOffset(days=6)])  # Example zoom to first 6 months

# Enable drag-to-zoom and add a range slider
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

fig.show()