## Create code that processes model output into nice visualizations:

1) The files with daily deaths, hospital, deaths counts across all population per districts with format output_workplaceBubblesSophie_ need to be averaged for daily trajectories
2) The files with daily deaths, deaths, and in occupation by occupation (occ4) with format output_workplaceBubblesSophie_1_Economic_Status_Covid need to be averaged for daily trajectories
3) The files with daily demographics of deaths and deaths by sex and gender

In [162]:
# generic packages
import os
from os.path import isfile, join
import glob
import subprocess
import re

#dataframe packages
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# plot packages
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab as plt
import seaborn as sns
sns.set(style="darkgrid")

#maps
import geopandas as gpd
import plotly.express as px
import json


In [163]:

# input_path = "/Users/sophieayling/Library/CloudStorage/GoogleDrive-sophie2ayling@gmail.com/My Drive/PhD/06_Data and Modelling/thesis_data/model_output/daily_case_death_counts/"
# output_path = "/Users/sophieayling/Library/CloudStorage/GoogleDrive-sophie2ayling@gmail.com/My Drive/PhD/06_Data and Modelling/thesis_data/model_output/daily_case_death_counts/plots/"

dist_input_path= "/Users/sophieayling/Library/CloudStorage/GoogleDrive-sophie2ayling@gmail.com/My Drive/PhD/06_Data and Modelling/thesis_data/model_output/b_district_daily_counts/"
dist_output_path= "/Users/sophieayling/Library/CloudStorage/GoogleDrive-sophie2ayling@gmail.com/My Drive/PhD/06_Data and Modelling/thesis_data/model_output/b_district_daily_counts/plots/"

In [164]:
# decide which version I am creating graphics for 

# Define the folder path and file prefix
folder_path = dist_input_path


file_prefix = 'output_workplaceBubblesSophie_'
id_prefix='bubblesNorm'

file_prefix = 'output_perfectMixingSophie_'
id_prefix="perfMix"

file_prefix ='output_schoolToHomeSophie_'
id_prefix = "schoolToHome"

file_prefix ='output_schoolToComSophie_'
id_prefix = "schoolToCom"

file_prefix ='output_comWorkToHomeSophie_'
id_prefix = "comWorkersToHome"

file_prefix = 'output_workToHomeSophie_'
id_prefix = 'workToHome'

file_prefix = 'output_allToHomeSophie_'
id_prefix = 'allToHome'

# # # ## mobility scenarios set 
file_prefix = 'output_BubblesLd_'
id_prefix = 'bubblesLd'

# file_prefix = 'output_BubblesLd_'
# id_prefix = 'bubblesLdv2'

file_prefix = 'output_BubblesLd1a_'
id_prefix = 'bubblesLd_1a'

file_prefix = 'output_BubblesLd1b_'
id_prefix = 'bubblesLd_1b'

file_prefix = 'output_BubblesLd2a_'
id_prefix = 'bubblesLd_2a'

file_prefix = 'output_BubblesLd2b_'
id_prefix = 'bubblesLd_2b'

file_prefix = 'output_BubblesLd3a_'
id_prefix = 'bubblesLd_3a'

file_prefix = 'output_BubblesLd3b_'
id_prefix = 'bubblesLd_3b'

## 3. Deaths per district (and other metrics) - maps

In [165]:

# Use glob to find all files with the specified prefix
file_pattern = f"{folder_path}/{file_prefix}*.txt"
file_list = glob.glob(file_pattern)


# Initialize an empty list to store individual DataFrames
df_list = []

In [166]:


# Loop through the list of files and read each one into a DataFrame
for file in file_list:
    df = pd.read_csv(file, delimiter='\t')# Adjust delimiter as per your file format
    # Extract the run number from the filename
    run_number = os.path.basename(file).split('_')[2]
    df['run']=int(run_number)
    df_list.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
final_df = pd.concat(df_list, ignore_index=True)
# Convert the 'run_number' column to numeric (int64)
final_df['run'] = pd.to_numeric(final_df['run'])
# Display the resulting DataFrame
final_df.head(10)
#final_df.metric.value_counts()

Unnamed: 0,day,metric,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,...,d_52,d_53,d_54,d_55,d_56,d_57,d_58,d_59,d_60,run
0,0,total_cases,7,6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
1,0,total_asympt_cases,1,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
2,0,total_mild_cases,4,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
3,0,total_severe_cases,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
4,0,total_critical_cases,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
5,0,total_recovered,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
6,0,cumulative_cases,7,6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
7,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
8,0,new_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7
9,1,total_cases,5,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,7


In [167]:
# keep one with the total deaths by district
cum_deaths_df = final_df[final_df['metric']=='cumulative_deaths']
# create variable across district columns 

cum_deaths_df=cum_deaths_df.sort_values(by=['day', 'run'])
cum_deaths_df.head()

Unnamed: 0,day,metric,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,...,d_52,d_53,d_54,d_55,d_56,d_57,d_58,d_59,d_60,run
6307,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5407,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
907,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
2707,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3607,0,cumulative_deaths,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5


In [168]:
# now reshape the data across
df_melted = pd.melt(cum_deaths_df, id_vars=['day', 'run'], value_vars=[f'd_{i}' for i in range(1, 61)], var_name='district', value_name='cum_deaths')

# create version which is without the d_
df_melted['dist_no']= df_melted['district'].str.replace('d_', '')

df_melted.head()


# do one also for cumulative deaths 
df_melted = pd.melt(cum_deaths_df, id_vars=['day', 'run'], value_vars=[f'd_{i}' for i in range(1, 61)], var_name='district', value_name='cum_deaths')

# create version which is without the d_
df_melted['dist_no']= df_melted['district'].str.replace('d_', '')

df_melted.head()


Unnamed: 0,day,run,district,cum_deaths,dist_no
0,0,1,d_1,0,1
1,0,2,d_1,0,1
2,0,3,d_1,0,1
3,0,4,d_1,0,1
4,0,5,d_1,0,1


In [169]:

#group the data by daily district deaths (aggregating runs) but keeping districts independent
r_data = df_melted.groupby(['day','dist_no'])['cum_deaths'].mean().reset_index()
# Round the 'value' column to 1 decimal place
r_data['cum_deaths'] = r_data['cum_deaths'].round(1)

r_data['dist_no']=pd.to_numeric(r_data['dist_no'])
r_data=r_data.sort_values(by=['day', 'dist_no'])

r_data.to_csv (dist_output_path+f'{id_prefix}agg_death_counts_dist.csv')
r_data.sort_values(by='dist_no')
r_data.head()

Unnamed: 0,day,dist_no,cum_deaths
0,0,1,0.0
11,0,2,0.0
22,0,3,0.0
33,0,4,0.0
44,0,5,0.0


In [170]:
deaths_per_dist = r_data.groupby('dist_no', as_index=False)['cum_deaths'].max()

r_data_t5_deaths = deaths_per_dist.nlargest(5,'cum_deaths')
# r_data_t5_death = r_data.nlargest(5,'metric_died_count')


r_data_t5_deaths.to_csv(dist_output_path+f'_top5_deaths_dists_{id_prefix}.csv')
r_data_t5_deaths.head()

Unnamed: 0,dist_no,cum_deaths
1,2,120.8
0,1,52.4
5,6,44.8
6,7,43.4
26,27,39.9


In [171]:
stop

NameError: name 'stop' is not defined

In [None]:
# now graph

# ensure I have enough colours in the palette 
palette = sns.color_palette("husl", 60)

# Plotting the data
plt.figure(figsize=(10, 6))
sns.lineplot(data=r_data, x="day", y="cum_deaths", hue="dist_no", palette=palette) #, style="", , err_style="band"


# Adding titles and labels
plt.title(f'{id_prefix} Cumulative Deaths Over Time by District', size=18)
plt.xlabel('Day')
plt.ylabel('Total Number of New Deaths')
plt.ylim(0,150)
plt.legend(title='District', loc= 'upper left', bbox_to_anchor=(1,1), ncol=3)
plt.xlim(0,90)
plt.grid(True)

# export the plot 
plt.savefig(dist_output_path+f'{id_prefix}deaths_over_time_district.png', dpi=300)


In [None]:
# now plot - districts with the highest death numbers overall

map_input_path= "/Users/sophieayling/Library/CloudStorage/GoogleDrive-sophie2ayling@gmail.com/My Drive/PhD/06_Data and Modelling/thesis_data/shapefiles/60_districts/"
dist_shape="ZWE_adm2.shp"

dists=gpd.read_file(map_input_path+dist_shape)

dists.plot()
plt.show()
dists.head()
dists['dist_no']= dists['ID_2']

In [None]:
#group the data by daily district deaths (aggregating runs) but keeping districts independent - as I'm now doing cumulative, only want to keep the last obs 
r_data_tot = r_data.groupby('dist_no')['cum_deaths'].last().reset_index()
# Round the 'value' column to 1 decimal place
r_data_tot['cum_deaths'] = r_data_tot['cum_deaths'].round(1)
r_data_tot.to_csv(dist_output_path+f'{id_prefix}cum_deaths_dist.csv')
r_data_tot.head()

In [None]:
merged_gdf=dists.merge(r_data_tot, on= 'dist_no')
# Rename the column 'NAME_2' to 'name'
merged_gdf.rename(columns={'NAME_2': 'name'}, inplace=True)

# Define color map
cmap = 'magma_r' # if you add _r at the end it inverts the color map

# other options include viridis, plasma, inferno, magma, cividis

# Create plot
fig, ax = plt.subplots(1, 1, figsize=(10, 8))

# Plot the GeoDataFrame with the specified column and color map
merged_gdf.plot(column='cum_deaths', cmap=cmap, linewidth=0.8, ax=ax, edgecolor='0.8', legend=True, vmin=0, vmax=150)

# Add title
plt.title(f'{id_prefix}Total deaths by District', fontsize=18)

# export
plt.savefig(dist_output_path+f'{id_prefix}dist_tot_deaths_map.png', dpi=300)


In [None]:
# use plotly to make an interactive map instead

import plotly.express as px


# Convert the GeoDataFrame back to a geographic CRS (EPSG:4326) for Plotly
gdf= merged_gdf.to_crs(epsg=4326)

# covert geodataframe to geojson
geojson_data= json.loads(gdf.to_json())

# Coordinates for Zimbabwe's approximate center
zimbabwe_center = {"lat": -19.015438, "lon": 29.154857}
    
# Create the choropleth map using Plotly
fig = px.choropleth_mapbox(
    gdf,
    geojson=geojson_data,
    locations='dist_no',  # Column in merged_gdf to match with the GeoJSON
    featureidkey="properties.dist_no",  # Property in GeoJSON to match locations - if you don't do this it gets super confused, this command is essential
    color='cum_deaths',  # Column to use for color
    color_continuous_scale="magma_r",
    center=zimbabwe_center,
    mapbox_style="carto-positron",
    zoom=5,
    title='Total deaths by District', 
    hover_data={'dist_no': True, 'name': True, 'cum_deaths': True}  # Add district name and other relevant data to hover info

)

# Update the layout to set the color scale and other properties
fig.update_layout(
    coloraxis_colorbar=dict(
        title="Total deaths",
        ticks="outside"
    ),
    margin={"r":0,"t":0,"l":0,"b":0}
)

# Show the map
fig.show()





In [None]:
gdf.head(60)