<a href="https://colab.research.google.com/github/pandemic-tracking/viz-gen/blob/main/louisville_wws.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install geopandas

In [None]:
import geopandas as gpd

In [None]:
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
import pytz

import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

from pathlib import Path

pd.set_option("display.precision", 4)

now_est = datetime.now().astimezone(pytz.timezone("US/Eastern"))

now_est_time = now_est.strftime("%Y-%m-%d, %H:%M:%S ET")
now_est_date = now_est.strftime("%Y-%m-%d")
now_est_timestamp = now_est.strftime("%Y%m%d_%H%M%S")
now_utc_timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
print(now_est_time, now_est_date, now_est_timestamp, now_utc_timestamp)

In [None]:
# adapting from https://towardsdatascience.com/consistently-beautiful-visualizations-with-altair-themes-c7f9f889602

def ptc_theme():
    axisColor = "#808080"
    gridColor = "#DEDDDD"
    markColor = "#000000"
    font = 'Arial'
    labelFont = 'Arial'
    # Colors
    # main_palette = ["#1696d2", 
    #                 "#d2d2d2",
    #                 "#000000", 
    #                 "#fdbf11", 
    #                 "#ec008b", 
    #                 "#55b748", 
    #                 "#5c5859", 
    #                 "#db2b27", 
    #                ]
    # sequential_palette = ["#cfe8f3", 
    #                       "#a2d4ec", 
    #                       "#73bfe2", 
    #                       "#46abdb", 
    #                       "#1696d2", 
    #                       "#12719e", 
    #                      ]
    return {
          "config": {
              "title": {
                  "fontSize": 20,
                  "font": font,
                  "anchor": "start", # equivalent of left-aligned.
                  "fontColor": "#000000",
                  "fontWeight": "normal",
              },
              "text": {
                  "font": font,
                  "labelFont": labelFont,
              },
              "header": {
                  "font": font,
                  "labelFont": labelFont,
                  "titleFont": font,
              },
              "axisX": {
                  "domain": False,
                  "domainColor": axisColor,
                  "labelColor": axisColor,
                  "domainWidth": 1,
                  "grid": False,
                  "labelFont": labelFont,
                  "labelFontSize": 12,
                  "labelAngle": 0, 
                  "tickColor": axisColor,
                  "tickSize": 5, # default, including it just to show you can change it
                  "titleFont": font,
                  "titleFontSize": 12,
                  "titlePadding": 10, # guessing, not specified in styleguide
                  "title": "X Axis Title (units)", 
              },
              "axisY": {
                  "domain": False,
                  "grid": True,
                  "gridColor": gridColor,
                  "gridWidth": 1,
                  "labelFont": labelFont,
                  "labelColor": axisColor,
                  "labelFontSize": 12,
                  "labelAngle": 0,
                  "labelAnchor": "end",
                  "labelAlign": "right",  
                  "ticks": False, # even if you don't have a "domain" you need to turn these off.
                  "titleFont": font,
                  "titleFontSize": 12,
                  "titlePadding": 10, # guessing, not specified in styleguide
                  "title": "Y Axis Title (units)", 
                  # titles are by default vertical left of axis so we need to hack this 
                  "titleAngle": 0, # horizontal
                  "titleY": -10, # move it up
                  "titleX": 18, # move it to the right so it aligns with the labels 
              },
            #   "range": {
            #       "category": main_palette,
            #       "diverging": sequential_palette,
            #   },
              "legend": {
                  "labelFont": labelFont,
                  "labelFontSize": 12,
                  "symbolType": "circle", # just 'cause
                  "symbolSize": 100, # default
                  "titleFont": font,
                  "titleFontSize": 12,
                  "title": "", # set it to no-title by default
                  "orient": "right", # so it's right next to the y-axis
                  "offset": 0, # literally right next to the y-axis.
              },
              "view": {
                  "stroke": "transparent", # altair uses gridlines to box the area where the data is visualized. This takes that off.
              },
        }
    }

alt.themes.register("my_custom_theme", ptc_theme)
alt.themes.enable("my_custom_theme")

# Load case data for zipcode and Jefferson County

In [None]:
# downloaded shapefile zip from https://data.lojic.org/datasets/LOJIC::weekly-covid-19-average-daily-incidence-rate-per-zip-code-in-jefferson-county-ky/about
zip_gdf = gpd.read_file('/content/Weekly_COVID-19_Average_daily_incidence_rate_per_zip_code_in_Jefferson_County%2C_KY (1).zip', parse_dates=['COLLECTION'])

In [None]:
zip_gdf.plot()

In [None]:
zip_gdf[zip_gdf.ZIPCODE.isna()]

In [None]:
zip_shapes = zip_gdf[zip_gdf['COLLECTION']=='2022-01-16']
zip_shapes.plot()

In [None]:
zips_merged = zip_gdf.dissolve()
zips_merged.plot()

In [None]:
zip_gdf['date_ending'] = pd.to_datetime(zip_gdf['COLLECTION'])+timedelta(days=6)

In [None]:
# downloaded from https://data.lojic.org/datasets/covid-19-daily-case-count-in-jefferson-county-ky/explore
county_df = pd.read_csv('/content/COVID-19_Daily_Case_Count_in_Jefferson_County%2C_KY (1).csv', parse_dates=['reported'])

In [None]:
county_df = county_df.sort_values('reported')

In [None]:
county_df['confirmed_per100k_7davg'] = county_df['Confirmed_RatePer100K'].rolling(7, min_periods=5).mean()

In [None]:
county_df.columns

In [None]:
county_df[['reported','Confirmed_RatePer100K','confirmed_per100k_7davg']].tail(50)

# Load WWS data from Louisville

In [None]:
# excel file from UL partner, imported into Google Sheets here: https://docs.google.com/spreadsheets/d/1cil_OTYN8GiKwj9FDioj60yvWpw_5k5VU5bTWwN-ojo/edit?usp=sharing
ww_df = pd.read_csv('/content/louisville-wws-data - adjusted SARS-CoV-2 N1 (2).csv')
# ww_df = pd.read_excel('/content/viral_concentration_and_case_data_2022-01-24_JZ (1).xlsx', sheet_name='adjusted SARS-CoV-2 N1')
ww_df

In [None]:
ww_df.columns=ww_df.iloc[0]
ww_df=ww_df.iloc[3:]

In [None]:
ww_df.rename({'catchment area name':'date'}, axis=1, inplace=True)
ww_df['date'] = pd.to_datetime(ww_df['date'])

In [None]:
ww_df

In [None]:
ww_df_melt = ww_df.melt(id_vars='date', var_name='sewershed')

In [None]:
ww_df_melt['value'] = ww_df_melt['value'].astype('float')

In [None]:
ww_df_melt[ww_df_melt['value']==ww_df_melt.value.max()]

In [None]:
ww_df_melt

In [None]:
sewershed_shapes = gpd.read_file('https://gist.githubusercontent.com/daveluo/2a3ea992e7a90d4c61bb518fc1715dcd/raw/bb1279f300b53be1aa430757265484ec7bbad896/Sewersheds_9_30.geojson')

In [None]:
sewershed_shapes.plot()

In [None]:
sewershed_shapes

In [None]:
# create dict for matching up the catchment area numbers and names
ww_df_nums = pd.read_csv('/content/louisville-wws-data - adjusted SARS-CoV-2 N1 (2).csv')
# ww_df_nums = pd.read_excel('/content/viral_concentration_and_case_data_2022-01-24_JZ (1).xlsx', sheet_name='adjusted SARS-CoV-2 N1')
ww_df_nums.columns = ww_df_nums.iloc[0]
ww_df_dict = ww_df_nums.iloc[1].to_dict()
ww_df_dict_rev = {v:k for k,v in ww_df_dict.items()}
ww_df_dict_rev

In [None]:
sewershed_shapes[['Address','ZoneNUM']]

In [None]:
ww_df_dict_rev['18']=''

In [None]:
sewershed_shapes['Address_2'] = sewershed_shapes['ZoneNUM'].apply(lambda x: ww_df_dict_rev[str(x)])

In [None]:
sewershed_shapes[['ZoneNUM','Address','Address_2','geometry']]

In [None]:
sewershed_shapes.Address = sewershed_shapes.Address.replace(' ',np.nan).combine_first(sewershed_shapes.Address_2)
sewershed_shapes.Address_2 = sewershed_shapes.Address_2.replace('',np.nan).combine_first(sewershed_shapes.Address)
sewershed_shapes[['ZoneNUM','Address','Address_2','geometry']]

In [None]:
sewershed_shapes.sort_values('ZoneNUM')

In [None]:
sewershed_shapes.sort_values('ZoneNUM')[['ZoneNUM','Address','Address_2','geometry']].to_file('louisville_sewersheds.geojson', driver='GeoJSON')

In [None]:
ww_df_melt = pd.merge(ww_df_melt, sewershed_shapes[['Address','Address_2']], left_on=['sewershed'], right_on=['Address_2'])

In [None]:
ww_df_melt

# Make Viz

In [None]:
viz_start_date = '2021-07-01'
min_visible_conc = 0.001

ww_source_df = ww_df_melt[ww_df_melt['date']>=viz_start_date]
ww_source_df[ww_source_df['value']<=min_visible_conc] = np.nan # remove points from view so they're not shown at bottom after clamping 
zip_source_df = zip_gdf[(zip_gdf['date_ending']>=viz_start_date)][['ZIPCODE','date_ending','aveDaily_R']]
county_source_df = county_df[(county_df['reported']>=viz_start_date)]

In [None]:
county_viz_line = alt.Chart(county_source_df).mark_line(stroke='#0094ea', strokeWidth=4, opacity=0.7).encode(
    x=alt.X('reported:T', #scale=alt.Scale(domain=[ww_source_df.date.min(), ww_source_df.date.max()], clamp=False)
    ),
    y='confirmed_per100k_7davg'
)

county_viz_bar = alt.Chart(county_source_df).mark_bar(stroke='#0094ea', width=2, opacity=0.4).encode(
    x=alt.X('reported:T', #scale=alt.Scale(domain=[ww_source_df.date.min(), ww_source_df.date.max()], clamp=False)
    ),
    y='Confirmed_RatePer100K'
)

county_viz = county_viz_bar+county_viz_line
county_viz

In [None]:
zip_select = alt.selection(type='multi', fields=['ZIPCODE'], #bind=zip_dropdown, 
                                    name='Zip Code')

zip_viz = alt.Chart(zip_source_df).mark_line(point=True, opacity=0.7).encode(
    color=alt.Color('ZIPCODE', scale=alt.Scale(scheme='goldorange', reverse=False), legend=None),
    x=alt.X('date_ending:T', #scale=alt.Scale(domain=[ww_source_df.date.min(), ww_source_df.date.max()], clamp=True), 
            axis=alt.Axis(title=None)),
    y=alt.Y('aveDaily_R', axis=alt.Axis(title='Daily Cases per 100K (7d avg) per ZIP Code (orange) compared to Jefferson County-wide (blue)', titleAnchor='start', titleX=-50)),
    tooltip=['ZIPCODE','date_ending','aveDaily_R']
).properties(width=400, height=200).add_selection(zip_select).transform_filter(zip_select)

zip_viz

In [None]:
zip_base_map = alt.Chart(data=zip_shapes).mark_geoshape(stroke='black').encode(
    color=alt.condition(zip_select, alt.value('yellow'), alt.value('lightgrey')),
    opacity=alt.condition(zip_select, alt.value(0.3), alt.value(0.1)),
    strokeOpacity=alt.value(1),
    strokeWidth=alt.condition(zip_select, alt.value(2), alt.value(0.5)),
    tooltip=["ZIPCODE"],
    ).project(
      type='mercator', #reflectY=True
).properties(width=500, height=400).add_selection(zip_select)#.transform_filter(zip_select)

zip_base_map

In [None]:
county_boundary = alt.Chart(data=zips_merged).mark_geoshape(stroke='#0094ea', strokeWidth=3, opacity=0.5, fill=None).encode(
    ).project(
      type='mercator', #reflectY=True
).properties(width=500, height=400)
county_boundary

In [None]:
sewershed_select = alt.selection_multi(fields=['Address_2'], bind='legend', clear=False)

sewershed_viz = alt.Chart(ww_source_df).mark_point(filled=True, clip=True).encode(
    x=alt.X('date:T', scale=alt.Scale(domain=[ww_source_df.date.min(), county_df.reported.max()], clamp=False), 
            axis=alt.Axis(title=None)),
    y=alt.Y('value:Q', axis=alt.Axis(title='Wastewater Virus Concentration by Area (PMMoV-adjusted SARS-CoV-2 N1 copies/mL) in log scale',
                                     titleAnchor='start', titleX=-50), scale=alt.Scale(type='log', domain=[min_visible_conc, 1e5], clamp=True)),
    color=alt.Color('Address_2', scale=alt.Scale(scheme='category20')),
    opacity = alt.condition(sewershed_select, alt.value(0.8), alt.value(0.)),
    tooltip=['date','Address_2','value']
).properties(width=400, height=200).add_selection(sewershed_select).transform_filter(sewershed_select)

sewershed_viz

In [None]:
sewershed_base_map = alt.Chart(data=sewershed_shapes).mark_geoshape(stroke='black', strokeWidth=0.5).encode(
    color=alt.value('grey'),
    ).project(type='mercator').properties(width=500, height=500)

sewershed_map = sewershed_base_map.encode(
    color=alt.Color("Address_2:N", legend=alt.Legend(title='Catchment Area', orient='left'), scale=alt.Scale(scheme='category20')),
    opacity = alt.condition(sewershed_select, alt.value(1), alt.value(0.2)),
).add_selection(sewershed_select).properties(
    title={"text": ["Wastewater Catchment Areas with ZIP Code Boundaries Overlaid in Yellow"],
           "subtitle": "Click on map to select ZIP code, click on legend to select catchment area (shift-click to select multiple areas)",
          "fontSize": 14,
          "fontWeight": "bold",
    },
    # title='Wastewater Catchment Areas with Zip Code Boundaries Overlaid in Yellow'

)

sewershed_map

In [None]:
combo_viz = alt.hconcat(sewershed_map+zip_base_map+county_boundary, (zip_viz+county_viz_line).resolve_scale(color='independent')& sewershed_viz).properties(
    title={
      "text": ["Louisville, KY SARS-CoV-2 Wastewater Testing by Catchment Area and New Case Trends by ZIP code and in Jefferson County"], 
      "subtitle": ["Sources: Wastewater testing - University of Louisville, Cases - Louisville Metro Dept of Public Health and Wellness and LOJIC Open Geospatial Data",
                  ""],
      "subtitleFontSize": 14
    },

).configure_legend(offset=10)

In [None]:
combo_viz

In [None]:
combo_viz.save('louisville-wws.html')

# Save to Drive

In [None]:
# this is for saving altair charts to png and svg, based on https://colab.research.google.com/github/altair-viz/altair_saver/blob/master/AltairSaver.ipynb#scrollTo=ZiTDBCAM_Ni8
!pip install -q altair_saver
!npm install --silent vega-lite vega-cli canvas

In [None]:
# issue with gcloud dependency on httplib2: https://stackoverflow.com/questions/59815620/gcloud-upload-httplib2-redirectmissinglocation-redirected-but-the-response-is-m
# may need to restart runtime after this cell runs and resume from here
!pip install 'httplib2<0.16.0' --force-reinstall

In [None]:
from pathlib import Path
from altair_saver import save

SAVE_PATH = Path('assets')
SAVE_PATH.mkdir(exist_ok=True)

# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
gdrive = GoogleDrive(gauth)

In [None]:
def assets_to_gdrive(folder_name, localdir_path = SAVE_PATH, parentdir_id='17Kx2uZbQv1r5U1M9x_OXS4lpMU5c6Ym8'):
  # search gdrive for snapshot folder and save assets there if it already exists. 
  folder_id = ''
  file_list = gdrive.ListFile({'q': f"'{parentdir_id}' in parents and mimeType = 'application/vnd.google-apps.folder' and trashed=false"}).GetList()
  for file1 in file_list:
      if file1['title'] == folder_name: 
        folder_id = file1['id']
        print(f'Found pre-existing gdrive folder named "{folder_name}" at',folder_id)
  # if not, create new folder
  if folder_id == '':
    folder = gdrive.CreateFile(metadata={'title': folder_name,
                                      'parents':[{'id': parentdir_id}],
                                      "mimeType": "application/vnd.google-apps.folder"
                                      })
    folder.Upload()
    folder_id = folder.get('id')
    print(f'Created new gdrive folder named "{folder_name}" at',folder_id)
  
  # upload all files within SAVE_PATH to snapshot folder
  for asset_file in localdir_path.iterdir():
    print('Saving file...',asset_file.name)
    file_path = localdir_path/asset_file.name
    file1 = gdrive.CreateFile(metadata={'title':asset_file.name,
                                        'parents':[{'id': folder_id}],
                                        })
    file1.SetContentFile(file_path)
    file1.Upload()
    print('Saved file: ',asset_file.name)


In [None]:
def save_vizassets(chart, save_path, filename, fmts=['html','json','png','svg',]):
  for fmt in fmts:
    save(chart, f'{save_path}/{filename}.{fmt}')

In [None]:
# put your stuff (i.e. dataframes, altair charts, input data files) to save here

In [None]:
# examples:

# cpr_df.to_csv(SAVE_PATH/'cpr_df.csv')
# source_concat.to_csv(SAVE_PATH/'source_concat.csv')
ww_df.to_csv(SAVE_PATH/'ww_df.csv')
ww_df_melt.to_csv(SAVE_PATH/'ww_df_melt.csv')
zip_gdf.to_csv(SAVE_PATH/'zip_gdf.csv')
county_df.to_csv(SAVE_PATH/'county_df.csv')
save_vizassets(combo_viz, SAVE_PATH, f'combo_viz_{now_utc_timestamp}')

In [None]:
# get the colab filename
from requests import get
nb_name = get('http://172.28.0.2:9000/api/sessions').json()[0]['name'].replace('.ipynb','')
nb_id = get('http://172.28.0.2:9000/api/sessions').json()[0]['notebook']['path'].replace('fileId=','')

print(SAVE_PATH, nb_name, now_utc_timestamp, nb_id)

# create a snapshot of this currently running notebook and save to SAVE_PATH
downloaded_nb = gdrive.CreateFile({'id':nb_id})   # replace the id with id of file you want to access
downloaded_nb.GetContentFile(SAVE_PATH/f'{nb_name}_{now_utc_timestamp}.ipynb')

In [None]:
# upload everything to gdrive
assets_to_gdrive(folder_name=f'{nb_name}_{now_utc_timestamp}')