<a href="https://colab.research.google.com/github/pandemic-tracking/viz-gen/blob/main/ptc_pivottable_hexmaps_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install geopandas

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
import pytz

import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

from pathlib import Path

pd.set_option("display.precision", 4)

now_est = datetime.now().astimezone(pytz.timezone("US/Eastern"))

now_est_time = now_est.strftime("%Y-%m-%d, %H:%M:%S ET")
now_est_date = now_est.strftime("%Y-%m-%d")
now_est_timestamp = now_est.strftime("%Y%m%d_%H%M%S")
now_utc_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
print(now_est_time, now_est_date, now_est_timestamp, now_utc_timestamp)

In [None]:
# adapting from https://towardsdatascience.com/consistently-beautiful-visualizations-with-altair-themes-c7f9f889602

def ptc_theme():
    axisColor = "#808080"
    gridColor = "#DEDDDD"
    markColor = "#000000"
    font = 'Arial'
    labelFont = 'Arial'
  
    return {
          "config": {
              "title": {
                  "fontSize": 20,
                  "font": font,
                  "anchor": "start", # equivalent of left-aligned.
                  "fontColor": "#000000",
                  "fontWeight": "bold",
              },
              "text": {
                  "font": font,
                  "labelFont": labelFont,
              },
              "header": {
                  "font": font,
                  "labelFont": labelFont,
                  "titleFont": font,
              },
              "axisX": {
                  "domain": False,
                  "domainColor": axisColor,
                  "labelColor": axisColor,
                  "domainWidth": 1,
                  "grid": False,
                  "labelFont": labelFont,
                  "labelFontSize": 12,
                  "labelAngle": 0, 
                  "tickColor": axisColor,
                  "tickSize": 5, # default, including it just to show you can change it
                  "titleFont": font,
                  "titleFontSize": 12,
                  "titlePadding": 10, # guessing, not specified in styleguide
                  "title": "X Axis Title (units)", 
              },
              "axisY": {
                  "domain": False,
                  "grid": True,
                  "gridColor": gridColor,
                  "gridWidth": 1,
                  "labelFont": labelFont,
                  "labelColor": axisColor,
                  "labelFontSize": 12,
                  "labelAngle": 0,
                  "labelAnchor": "end",
                  "labelAlign": "right",  
                  "ticks": False, # even if you don't have a "domain" you need to turn these off.
                  "titleFont": font,
                  "titleFontSize": 12,
                  "titlePadding": 10, # guessing, not specified in styleguide
                  "title": "Y Axis Title (units)", 
                  # titles are by default vertical left of axis so we need to hack this 
                  "titleAngle": 0, # horizontal
                  "titleY": -10, # move it up
                  "titleX": 18, # move it to the right so it aligns with the labels 
              },
            #   "range": {
            #       "category": main_palette,
            #       "diverging": sequential_palette,
            #   },
              "legend": {
                  "labelFont": labelFont,
                  "labelFontSize": 12,
                  "symbolType": "square", # just 'cause
                  "symbolSize": 100, # default
                  "titleFont": font,
                  "titleFontSize": 12,
                  "title": "", # set it to no-title by default
                  "orient": "right", # so it's right next to the y-axis
                  "offset": 0, # literally right next to the y-axis.
              },
              "view": {
                  "stroke": "transparent", # altair uses gridlines to box the area where the data is visualized. This takes that off.
              },
        }
    }

alt.themes.register("my_custom_theme", ptc_theme)
alt.themes.enable("my_custom_theme")

In [None]:
import geopandas as gpd

gdf = gpd.read_file('https://raw.githubusercontent.com/pandemic-tracking/viz-pages/main/docs/us_hexmap_4326.geojson')
gdf = gdf.to_crs('epsg:3857')
gdf.loc[:, 'lon'] = gdf.geometry.centroid.x
gdf.loc[:, 'lat'] = gdf.geometry.centroid.y
gdf.head(2)

In [None]:
gdf.plot()

In [None]:
data_df = pd.read_csv('https://github.com/pandemic-tracking/bi/raw/main/US%20states%20breakthrough%20reporting%20-%20Snapshot.csv')
collection_date = '2022-02-03'

In [None]:
data_df

In [None]:
list(data_df.columns)

In [None]:
metrics_cols = [
 'BI cases',
 'BI cases as a percent of fully vaccinated',
 'BI hosp',
 'BI hosp as a percent of fully vaccinated',
 'BI COVID related hosp',
 'BI COVID related hosp as a percent of fully vaccinated',
 'BI deaths',
 'BI deaths as a percent of fully vaccinated',
 'BI COVID related deaths',
 'BI COVID related deaths as a percent of fully vaccinated',
 'not BI cases',
 'not BI cases as a percent of not vaccinated',
 'not BI hosp',
 'not BI hosp as a percent of not vaccinated',
 'not BI COVID related hosp',
 'not BI deaths',
 'not BI deaths as percent of not vaccinated',
 'not BI COVID related deaths',
 'not BI COVID related deaths as percent of not vaccinated',
 'Total cases (since fully vax was option)',
 'Total hosp (since fully vax was option)',
 'Total COVID related hosp (since fully vax was option)',
 'Total deaths (since fully vax was option)',
 'Total COVID related deaths (since fully vax was option)',
 'Total Individuals fully vaccinated',
 'Total Individuals not fully vaccinated',
]

In [None]:
df = data_df[['Abbr']+metrics_cols].rename({'Abbr':'State'}, axis=1)

In [None]:
df

In [None]:
df[metrics_cols] = df[metrics_cols].notnull().astype(int)

In [None]:
df.loc[df.shape[0]]= ['Count'] + df[metrics_cols].sum(axis=0).to_list()

In [None]:
df

In [None]:
gdf = pd.merge(gdf, df, how='left', left_on='Code', right_on='State')

In [None]:
gdf.drop(['OBJECTID','SHAPE_Leng','SHAPE_Area'], axis=1, inplace=True)

In [None]:
# BI hosps OR BI hosps due to COVID and same for BI deaths, also make category names display-ready

gdf['Reporting Breakthrough Hospitalizations'] = np.where((gdf['BI hosp'] == 1) | (gdf['BI COVID related hosp']==1),1,0)
gdf['Reporting Breakthrough Deaths'] = np.where((gdf['BI deaths'] == 1) | (gdf['BI COVID related deaths']==1),1,0)

gdf.rename({'BI cases':'Reporting Breakthrough Cases'}, axis=1, inplace=True)

In [None]:
gdf = gdf.fillna(0)

In [None]:
gdf[['Code',
     'BI hosp','BI COVID related hosp','Reporting Breakthrough Hospitalizations',
     'BI deaths','BI COVID related deaths','Reporting Breakthrough Deaths'
     ]]

In [None]:
gdf_melt = gdf.melt(id_vars=['Code','Name','FIPS','geometry','lon','lat','State'])

In [None]:
gdf_melt

In [None]:
cat_options = list(gdf_melt['variable'].unique())
cat_options

In [None]:
# A dropdown filter
cat_dropdown = alt.binding_select(options=cat_options)
cat_select = alt.selection_single(fields=['variable'], bind=cat_dropdown, name="Metric", init={'variable':cat_options[0]})

base_map = alt.Chart(data=gdf_melt).mark_geoshape(stroke='black', strokeWidth=0.5).encode(
    color=alt.value('grey'),
    tooltip=["Code:O",'variable:O','value:Q'],
    ).project(
      type='identity', reflectY=True
).properties(width=500, height=280)

map = base_map.encode(
    color=alt.Color("value:Q", scale=alt.Scale(range=['blue','#dbdbdb',]), legend=None),
)

map_text = base_map.mark_text(size=12).encode(
    latitude='lat',
    longitude='lon',
    text='Code',
    color=alt.Color("value:Q", scale=alt.Scale(range=['#ffffff','#2b2b2b'], domain=[1,0]), legend=None),
    
)

display_text = base_map.mark_text(size=16, fontWeight=600, dx=-10, dy=-130).encode(
    text='variable:O',
    color=alt.value('black')
).transform_filter(alt.datum.Code=='CA')

hexmap = (base_map+map+map_text+display_text).transform_filter(cat_select
                                            ).add_selection(cat_select
                                            ).configure_view(strokeWidth=0)
hexmap

In [None]:
metric_count_dict = gdf[['Reporting Breakthrough Cases','Reporting Breakthrough Hospitalizations','Reporting Breakthrough Deaths']].sum().astype('int').to_dict()
metric_count_dict

In [None]:
display_date = datetime.strptime(collection_date,"%Y-%m-%d").strftime("%B %-d %Y")
collection_date, display_date

In [None]:
case_color, hosp_color, death_color, zero_color = '#3E85C6', '#0D5395', '#063763', '#dbdbdb'

def make_hexmap(metric, pos_color, neg_color):
  hexmap = (display_text.encode(
    color=alt.value(pos_color),
    text=alt.value(str(metric_count_dict[metric])+' '+metric)
   )+base_map
    +base_map.encode(color=alt.Color("value:Q", scale=alt.Scale(range=[pos_color, neg_color], domain=[1,0]), legend=None))
    +map_text
  ).transform_filter(alt.datum.variable==metric).resolve_scale(color='independent')
  return hexmap

case_map =  make_hexmap('Reporting Breakthrough Cases', case_color, zero_color)
hosp_map =  make_hexmap('Reporting Breakthrough Hospitalizations', hosp_color, zero_color)
death_map = make_hexmap('Reporting Breakthrough Deaths', death_color, zero_color)

tri_hexmaps = alt.vconcat(
    case_map|hosp_map,
    death_map,
    center=True,
).properties(title=alt.TitleParams(f'States Reporting Cumulative Breakthrough Cases, Hospitalizations, And Deaths As Of {display_date}', subtitle=['',''])
).configure_axis(
  labelFontSize=12,
  titleFontSize=16,
  labelColor='grey',
  # gridColor='#ccc',
  gridWidth=1,
  offset=0,
).configure_header(
  titleFontSize=16,
  labelFontSize=16, 
  titleFont='Arial'
).configure_title(
  fontSize=16,
).configure_legend(
  titleFontSize=12,
  labelFontSize=12
).configure_view(strokeWidth=0)

tri_hexmaps

# Scorecard Grade Bar

In [None]:
!wget https://docs.google.com/spreadsheets/d/e/2PACX-1vR2wl6WldIvP4JiSAqCtX6GAmdVBwMIOpMAickyf3lrv8Fcbc2O2ZGoeyBehS4hXbWxtzqiuHfN4PrT/pub?output=csv -O scores.csv

In [None]:
scores_df = pd.read_csv('scores.csv', parse_dates=['Last Updated'])
scores_df['date_str'] = scores_df['Last Updated'].astype('str')
scores_df['Grade_color'] = scores_df['Grade']

In [None]:
print(scores_df['date_str'].unique())
scorecard_date = scores_df['date_str'].unique()[0]
scorecard_display_date = datetime.strptime(scorecard_date,"%Y-%m-%d").strftime("%B %-d %Y")
print(scorecard_date, scorecard_display_date)

In [None]:
# divergent palette
# grade_colors = {
#     'A': '#488082',
#     'B': '#8AD9D4',
#     'C': '#FFE0D0',
#     'D': '#FF8545',
#     'F': '#BA4100',
#     'F (No data)': '#999891',
    
# }

# sequential palette
grade_colors = {
    'A': '#71D1C8',
    'B': '#5DACA7',
    'C': '#488682',
    'D': '#345F5D',
    'F': '#1F3938',
    'F (No data)': '#999891'
}

list(grade_colors.keys()), list(grade_colors.values())

In [None]:
scores_df.loc[scores_df['Score']==0, 'Grade_color'] = 'F (No data)'
scores_df

In [None]:
scores_df['Grade_color'].value_counts().sort_index()

In [None]:
base_bars = alt.Chart(scores_df).encode(
    x=alt.X('Grade:O', sort=['F','D','C','B','A']),
    y=alt.Y('count(State)', stack='zero'),
)

grade_bars = base_bars.mark_bar(width=150).encode(
    color=alt.Color('Grade_color', legend=None,  sort=['none','F'], scale=alt.Scale(domain=list(grade_colors.keys()), range=list(grade_colors.values()))),
    stroke=alt.Stroke('State', legend=None, scale=alt.Scale(range=['#fff'])),
    strokeWidth=alt.value(2),
)

grade_text = grade_bars.mark_text(size=18, dy=18, fontWeight=800).encode(
    text='State',
    color=alt.value('white'),
    stroke=alt.Stroke('Grade_color', sort=['none','F'], legend=None, scale=alt.Scale(domain=list(grade_colors.keys()), range=list(grade_colors.values()))),
    strokeWidth=alt.value(0),
)

grade_count = base_bars.mark_text(size=18, dy=-18, fontWeight=800).encode(
    text='count(State)',
    color=alt.value('black'),
)

grade_barviz = (grade_bars+grade_text+grade_count).resolve_scale(stroke='independent').properties(width=1000, height=550,
                                                              title={
                                                                "text": ['United States Vaccine Breakthrough Data Reporting Grades',], 
                                                                "subtitle": [f'As of {scorecard_display_date}',''],
                                                                "subtitleFontSize": 14
                                                              }
                                                              ).configure_axisY(grid=False, domain=False, labels=False).configure_axisX(ticks=False, labelFontSize=20, labelFontWeight=800, labelPadding=15)
grade_barviz

# Scorecard hexmap

In [None]:
scores_hexmap_df = pd.merge(gdf_melt, scores_df, how='left', left_on='Code', right_on='State')
scores_hexmap_df = scores_hexmap_df[scores_hexmap_df['variable']=='BI hosp'][['Code','Name','geometry','lon','lat','Grade','Grade_color']]
scores_hexmap_df.sort_values('Code', inplace=True)
scores_hexmap_df

In [None]:
grade_base_map = alt.Chart(data=scores_hexmap_df).mark_geoshape(stroke='black', strokeWidth=0.5).encode(
    color=alt.value('grey'),
    tooltip=["Code:O",'Grade'],
    ).project(
      type='identity', reflectY=True
).properties(width=900, height=550)

grade_map = grade_base_map.encode(
    color=alt.Color("Grade_color:N", legend=alt.Legend(orient='none', legendX=900, labelFontSize=16), 
                    scale=alt.Scale(domain=list(grade_colors.keys()), range=list(grade_colors.values()))),
)

grade_map_text = grade_base_map.mark_text(size=20, fontWeight=800).encode(
    latitude='lat',
    longitude='lon',
    text='Code',
    color=alt.value('white'),
)

grade_hexmap = (grade_map+grade_map_text).properties(
                title={
                  "text": ['United States Vaccine Breakthrough Data Reporting Grades',], 
                  "subtitle": [f'As of {scorecard_display_date}',''],
                  "subtitleFontSize": 14
                })
grade_hexmap

# Combo hexmap and barchart viz

In [None]:
combo_base_bars = alt.Chart(scores_hexmap_df).encode(
    x=alt.X('Grade:O', sort=['F','D','C','B','A']),
    y=alt.Y('count(Code)', stack='zero'),
)

combo_grade_bars = combo_base_bars.mark_bar(width=60).encode(
    color=alt.Color('Grade_color', legend=None,  sort=['none','F'], scale=alt.Scale(domain=list(grade_colors.keys()), range=list(grade_colors.values()))),
    stroke=alt.Stroke('Code', legend=None, scale=alt.Scale(range=['#fff'])),
    strokeWidth=alt.value(2),
)

combo_grade_text = combo_base_bars.mark_text(size=18, dy=18, fontWeight=800).encode(
    text='Code',
    color=alt.value('white'),
    stroke=alt.Stroke('Grade_color', sort=['none','F'], legend=None, scale=alt.Scale(domain=list(grade_colors.keys()), range=list(grade_colors.values()))),
    strokeWidth=alt.value(0),
)

combo_grade_count = combo_base_bars.mark_text(size=18, dy=-18, fontWeight=800).encode(
    text='count(Code)',
    color=alt.value('black'),
)

combo_grade_barviz = (combo_grade_bars+combo_grade_text+combo_grade_count).resolve_scale(stroke='independent').properties(width=400, height=550,
                                                              # title={
                                                              #   "text": ['United States Vaccine Breakthrough Data Reporting Grades',], 
                                                              #   "subtitle": [f'As of {display_date}',''],
                                                              #   "subtitleFontSize": 14
                                                              # }
                                                              )
combo_grade_barviz

In [None]:
combo_hexmap_barviz = (grade_map+grade_map_text | combo_grade_barviz).properties(
    title={
                  "text": ['United States Vaccine Breakthrough Data Reporting Grades',], 
                  "subtitle": [f'As of {scorecard_display_date}',''],
                  "subtitleFontSize": 16
    }
).configure_axisY(grid=False, domain=False, labels=False).configure_axisX(ticks=False, labelFontSize=20, labelFontWeight=800, labelPadding=15)
combo_hexmap_barviz

# Export csv of grades for Flourish update

In [None]:
flourish_scores_df = scores_df[['State','Grade','Grade_color']]
flourish_scores_df

In [None]:
flourish_scores_df.to_csv('flourish_scorecard_update.csv')

# Export geojson file for Flourish (only need this the first time creating Flourish hexmap template, commented out)

In [None]:
# scores_hexmap_df_4326 = scores_hexmap_df.to_crs('epsg:4326')
# scores_hexmap_df_4326.loc[:, 'lon'] = scores_hexmap_df_4326.geometry.centroid.x
# scores_hexmap_df_4326.loc[:, 'lat'] = scores_hexmap_df_4326.geometry.centroid.y

# scores_hexmap_df_4326.to_file(f'hexmap-grade-{collection_date}.geojson', driver='GeoJSON')

# Save to Drive

In [None]:
# this is for saving altair charts to png and svg, based on https://colab.research.google.com/github/altair-viz/altair_saver/blob/master/AltairSaver.ipynb#scrollTo=ZiTDBCAM_Ni8
!pip install -q altair_saver
!npm install --silent vega-lite vega-cli canvas

In [None]:
from pathlib import Path
from altair_saver import save

SAVE_PATH = Path('assets')
SAVE_PATH.mkdir(exist_ok=True)

# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
gdrive = GoogleDrive(gauth)

In [None]:
def assets_to_gdrive(folder_name, localdir_path = SAVE_PATH, parentdir_id='17Kx2uZbQv1r5U1M9x_OXS4lpMU5c6Ym8'):
  # search gdrive for snapshot folder and save assets there if it already exists. 
  folder_id = ''
  file_list = gdrive.ListFile({'q': f"'{parentdir_id}' in parents and mimeType = 'application/vnd.google-apps.folder' and trashed=false"}).GetList()
  for file1 in file_list:
      if file1['title'] == folder_name: 
        folder_id = file1['id']
        print(f'Found pre-existing gdrive folder named "{folder_name}" at',folder_id)
  # if not, create new folder
  if folder_id == '':
    folder = gdrive.CreateFile(metadata={'title': folder_name,
                                      'parents':[{'id': parentdir_id}],
                                      "mimeType": "application/vnd.google-apps.folder"
                                      })
    folder.Upload()
    folder_id = folder.get('id')
    print(f'Created new gdrive folder named "{folder_name}" at',folder_id)
  
  # upload all files within SAVE_PATH to snapshot folder
  for asset_file in localdir_path.iterdir():
    file_path = localdir_path/asset_file.name
    file1 = gdrive.CreateFile(metadata={'title':asset_file.name,
                                        'parents':[{'id': folder_id}],
                                        })
    file1.SetContentFile(file_path)
    file1.Upload()
    print('Saved file: ',asset_file.name)


In [None]:
def save_vizassets(chart, save_path, filename, fmts=['html','png','svg',#'json','pdf'
                                                     ]):
  for fmt in fmts:
    save(chart, f'{save_path}/{filename}.{fmt}')

In [None]:
# put your stuff (i.e. dataframes, altair charts, input data files) to save here

In [None]:
# examples:

# cpr_df.to_csv(SAVE_PATH/'cpr_df.csv')
# source_concat.to_csv(SAVE_PATH/'source_concat.csv')
# save_vizassets(state_vax_scatter, SAVE_PATH, f'state_vax_scatter_{now_utc_timestamp}')

gdf_melt.to_csv(SAVE_PATH/'gdf_melt.csv')

save_vizassets(hexmap, SAVE_PATH, f'hexmap_{now_utc_timestamp}')
save_vizassets(tri_hexmaps, SAVE_PATH, f'trihexmaps_{now_utc_timestamp}')
save_vizassets(grade_barviz, SAVE_PATH, f'grade_barviz_{now_utc_timestamp}')
save_vizassets(grade_hexmap, SAVE_PATH, f'grade_hexmap_{now_utc_timestamp}')
save_vizassets(combo_hexmap_barviz, SAVE_PATH, f'combo_hexmap_barviz_{now_utc_timestamp}')

scores_hexmap_df.to_csv(SAVE_PATH/'scores_hexmap_df.csv')
# scores_hexmap_df_4326.to_crs('epsg:4326').to_file(SAVE_PATH/f'hexmap-grade-{collection_date}.geojson', driver='GeoJSON')

In [None]:
!cp /content/scores.csv $SAVE_PATH/
!cp /content/flourish_scorecard_update.csv $SAVE_PATH/

In [None]:
# get the colab filename
from requests import get
nb_name = get('http://172.28.0.2:9000/api/sessions').json()[0]['name'].replace('.ipynb','')
nb_id = get('http://172.28.0.2:9000/api/sessions').json()[0]['notebook']['path'].replace('fileId=','')

print(SAVE_PATH, nb_name, now_utc_timestamp, nb_id)

# create a snapshot of this currently running notebook and save to SAVE_PATH
downloaded_nb = gdrive.CreateFile({'id':nb_id})   # replace the id with id of file you want to access
downloaded_nb.GetContentFile(SAVE_PATH/f'{nb_name}_{now_utc_timestamp}.ipynb')

In [None]:
# upload everything to gdrive
assets_to_gdrive(folder_name=f'{nb_name}_{now_utc_timestamp}')