## Setup



In [0]:
!pip install --quiet poetry  # Fixes https://github.com/python-poetry/poetry/issues/532
!pip install --quiet git+https://github.com/oughtinc/ergo.git@f487b7b9856378387bdf1647d69fdb170790e7c7
!pip install --quiet plotnine

    ents to build wheel ... etadata ... ERROR: chainer 6.5.0 has requirement typing-extensions<=3.6.6, but you'll have typing-extensions 3.7.4.2 which is incompatible.



In [0]:
import requests
import torch
import io
import zipfile
import os
import ergo
import requests
import scipy.stats

import pandas as pd
import numpy as np

from datetime import date, datetime, timedelta
from types import SimpleNamespace
from typing import List
from plotnine import ggplot, geom_point, aes, geom_vline, stat_smooth, facet_wrap, labs, guides

    /usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
      import pandas.util.testing as tm

Log into a Metaculus account

If running in a collab notebook, please enter your Metaculus credentials
here



In [0]:
def is_local():
  try:
    %env USER
    return True
  except:
    return False

metaculus_api = "pandemic"

if is_local():
  from dotenv import load_dotenv
  load_dotenv() 
  metaculus = ergo.Metaculus(username=os.getenv("METACULUS_USERNAME"), password=os.getenv("METACULUS_PASSWORD"), api_domain=metaculus_api)
else: 
  try:    
    metaculus = ergo.Metaculus(username="", password="", api_domain=metaculus_api)  
  except: 
    print(f'WARNING, You will need to enter your metaculus credentials in this cell')

## Questions



Here are the question we want to forecast:



In [0]:
question_ids = [
  3935,
  3948,
  3941,
  3939,
  3937
]
question_names = [
    "The United Kingdom",
    "France",
    "Poland",
    "The State of California",
    "Italy"
]


areas = question_names
questions = [metaculus.get_question(id, name=name) for (name, id) in zip(question_names, question_ids)]
ergo.MetaculusQuestion.to_dataframe(questions)

## Assumptions and Question Information



-   intensity&#x2014;a 1-5 corresponding to the severity/degree of adherence of
    the social distancing, with 5 being the most strictly
    observed/enforced social distancing
-   re-evaluation&#x2014;date at which government said they would re-assess
    lockdown



In [0]:
Area = dict
assumptions = SimpleNamespace()

assumptions.lockdowns = {
  "New York" : {
    "data_key" : "New York",
    "question_name" : "The State of New York",
    "start": date(2020, 3, 21),
    "intensity": 3,
    "end": None #pendulum.Date(2020, X, X)
  },
  "California" : {
    "data_key" : "California",
    "question_name" : "The State of California", 
    "start": date(2020, 3, 22),
    "end": None #date(2020, X, X)
  },
  "United Kingdom": {
    "data_key" : "United Kingdom",
    "question_name" : "The United Kingdom", 
    "start": date(2020, 3, 23),
    "re_evaluated": None, 
    "intensity": 3, 
    "end": None #date(2020, X, X)
  },
  "France": {
    "data_key" : "France",
    "question_name" : "France", 
    "start": date(2020, 3, 17),
    "re_evaluated": date(2020, 4, 1),
    "intensity": 3, 
    "end": None #date(2020, X, X)
  },
  "Poland": {
    "data_key": "Poland",
    "question_name" : "Poland",
    "start": date(2020, 3, 25),
    "re_evaluated": date(2020, 4, 11),
    "end": None #date(2020, X, X)
  },
  "Italy": {
    "data_key": "Italy",
    "question_name" : "Italy",
    "start": date(2020, 3, 23),
    "end": None #date(2020, X, X)
  }
}

Helper Functions



In [0]:
#hack implementation of date.fromisoformat (is in datetime @ pyton 3.7)
def fromisoformat( xdate): 
    return datetime.strptime(xdate, '%Y-%m-%d').date()

# This allows access to the right assumptions using a question name. 
def get_assumptions(key, assumptions = assumptions.lockdowns):
  if isinstance(key, str):
    if key in assumptions:
      return assumptions[key]
    print(f"No assumptions for data key: {key}")
  elif issubclass(type(key), ergo.metaculus.MetaculusQuestion):
    for k,v in assumptions.items():
      if v['question_name'] == key.name:
        return v
    print(f"No assumptions for question: {question.name}")
  else:
      print(f"Neither a question nor a data_key was passed")   

# ripped from https://techoverflow.net/2018/01/16/downloading-reading-a-zip-file-in-memory-using-python/
def download_extract_zip(url):
    """
    Download a ZIP file and extract its contents in memory
    yields (filename, file-like object) pairs
    """
    response = requests.get(url)
    with zipfile.ZipFile(io.BytesIO(response.content)) as thezip:
        for zipinfo in thezip.infolist():
            with thezip.open(zipinfo) as thefile:
                yield zipinfo.filename, thefile

## Data



We now have predictions from
[http://www.healthdata.org/covid/data-downloads](http://www.healthdata.org/covid/data-downloads)

***TODO*** if something like this is going to persist, then consider
migrating to ergo/data/covid19.py

Get Data



In [0]:
url="https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip"

for name, xfile in download_extract_zip(url):
    if os.path.basename(name) =="Hospitalization_all_locs.csv":
        infections_df = pd.read_csv(xfile)

Clean and Enhance Data

**variable description**

-   admis<sub>mean</sub> = daily # of admissions to hospital
-   allbed<sub>mean</sub> = cumsum of admis<sub>mean</sub>



In [0]:
#filter data        
infections_df = infections_df.loc[infections_df['location_name'].isin(["New York", "California", "United Kingdom", "France", "Poland",  "Italy"]),
                                  ['location_name', 'date', 'admis_mean', 'newICU_mean', 'deaths_mean', 'totdea_mean', 'allbed_mean', 'ICUbed_mean', 'bedover_mean', 'icuover_mean']]

#infections_df['location_name'].unique() 

# calculate days from lockdown
def calulate_days_from_lockdown_start(df: pd.core.frame.DataFrame):
    lockdown_start = get_assumptions(df['location_name'])['start']
    return (lockdown_start - fromisoformat(df['date'])).days

infections_df['days_from_lockdown'] = infections_df.apply(lambda x: calulate_days_from_lockdown_start(x), axis=1)

# calculate cumulative addmissions
infections_df['admis_cum'] = infections_df['admis_mean'].cumsum() 

# calculate doubling rate
infections_df['doubling_rate_in_days'] = infections_df['admis_cum'] / infections_df['admis_mean']

# calculate the new cases as a percentage of previous total
infections_df['progression'] =  infections_df['admis_cum'] / infections_df['admis_mean']
infections_df['progression'] = infections_df['progression'].apply(lambda x: min(x, 365)) # we don't care about doubling rates longer than a year (at least)

# remove all NAs
infections_df.fillna(0)

## Explore Features



### Hospital Admissions



In [0]:
plotnine.options.figure_size = (8,4)
(ggplot(infections_df, aes('days_from_lockdown', 'admis_mean', color='location_name'))
     + geom_point()
     + geom_vline(xintercept=0) 
     + labs(x='Days since lockdown',  y='New hospital admissions', title='Progression of hospital admissions')
  )

![img](59dc8262b32cd56b6ac9dee4e98c4d13d6fdbaa1.png)

Examine the evolution of the rate of the spread of the infection



In [0]:
plotnine.options.figure_size = (12,4)
(ggplot(infections_df, aes('days_from_lockdown', 'admis_mean', color='location_name'))
 + geom_point()
 + facet_wrap('~location_name', nrow=1)
 + labs(x='Days since lockdown',  y='New hospital admissions', title='Progression of hospital admissions')
 + guides(color=False)
 )

![img](0eb63d434f05366c11992e8de9beb4e8f91a7cad.png)



### Bedover



[covid all beds needed] - ([total bed capacity] - [average all bed
usage])



In [0]:
plotnine.options.figure_size = (10,4)
(ggplot(infections_df, aes('days_from_lockdown', 'bedover_mean', color='location_name'))
 + geom_point()
 + facet_wrap('~location_name')
 + geom_vline(xintercept=0)
 + labs(x='Days since lockdown',  y='Mean Bedover', title='Projected Bed Utilization')
)

![img](7754c4c2f89d42a735df75e62b529638e45557d7.png)



### Mean Beds needed for Covid cases



In [0]:
plotnine.options.figure_size = (10,4)
  (ggplot(infections_df, aes('days_from_lockdown', 'allbed_mean', color='location_name'))
   + geom_point()
   + facet_wrap('~location_name')
   + geom_vline(xintercept=0)
   + labs(x='Days since lockdown',  y='Mean Bedover', title='Projected Bed Utilization')
)

![img](b180a0bed70ecdd5fed6eab7c7dcc691715a589a.png)



### Infection Rate



**TODO** Need to edit calculation where little or no data



In [0]:
plotnine.options.figure_size = (10,4)
  (ggplot(infections_df, aes('days_from_lockdown', 'progression', color='location_name'))
   + geom_point()
   + facet_wrap('~location_name')
   + geom_vline(xintercept=0)
   + labs(x='Days since lockdown',  y='Pseudo doubling rate in days', title='Disease Growth' )
)

![img](6b1b3513926ad94b3b88ed7754b0080af45e1e0f.png)



## Model



**Approach**

*Simple Model* The decision to end a lockdown is considered every days
from the initial order. The average hospital admissions across the
previous weeks are considered. As the number of Covid-related hospital
admissions decreases, the likelihood of suspending the lockdown
increases.

*Conditions to consider adding*

-   deaths (relative to population)
-   deaths (realtive to infected)
-   ratio of recovery to newly infected
-   % of population has been tested > threshold
-   complex priors over dates (1st and 15th of month more likely. Perhaps
    Holiday's)?
-   depletion of susceptible stock 1 - (Infected + Recovered + Deaths) /
    population > threshold

*Ad-Hoc High-level features to include*

-   Government Market Orientation
-   Government Goal &#x2014; Signaling Action | Spread Mitigation

Model



In [0]:
def model(area: Area, data: pd.core.frame.DataFrame):
  # make sure reasonable things were passed in
  if not "location_name" in data:
    print(f'The data does not have the expected structure')
    raise Exception("bad data")
  elif not area.get("data_key") in data["location_name"].values:
    print(f'There is currently no infection data for {area["data_key"]}')
    raise Exception("no data")
  elif not area.get("start"):
        print(f'There is currently no lockdown for {area["data_key"]}')
        raise Exception("no lockdown")

  # model start
  area_data = data.loc[data["location_name"] == area.get("data_key")] # get data for area
  lockdown_duration = 0
  last_period_spread_rate = max(area_data['admis_mean'])
  while True:
    lockdown_duration += 1
    if (max(area_data['days_from_lockdown']) >= lockdown_duration): # keep using last infection_spread_rate if we run out of data (this is a bad hack)
      # take the average of the hospital admissions for the past five weeks
      infection_spread_rate = np.mean(area_data.loc[(area_data['days_from_lockdown'] > lockdown_duration - 35 ) &
                                                    (area_data['days_from_lockdown'] <= lockdown_duration), 'admis_mean'])

    # This logistic distribution gives the highest probability at 0 and decreases the larger the passed in value. The speed in which the probability drops off is modulated by the scale parameter. The lower the scale parameter, the lower probability assigned to high numbers. Here the .5 indicates a rather thin tail with a strong bias towards low numbers. 
    stop_quarantine = ergo.flip(scipy.stats.logistic.pdf(infection_spread_rate, scale = .5))
    last_period_spread_rate = infection_spread_rate 
    if(stop_quarantine):
        break

  ergo.tag(torch.Tensor([lockdown_duration]), area.get("data_key"))

Run Model



In [0]:
samples = pd.DataFrame() #unconditioned

for question in questions:
 samples[question.name] = ergo.run(lambda: model(get_assumptions(question), infections_df), num_samples=5).iloc[:,0]

## Submit Predictions



We calculated the model in terms of duration in days. Let's convert that to date format



In [0]:
for question in questions:
   if question.name in samples:
     start_date = get_assumptions(question)["start"]
     samples[question.name] = samples[question.name].apply(lambda x: start_date + timedelta(days=x))

### Compare Potential Submission to the Community's Predictions



In [0]:
for question in questions:
    if question.name in samples:
        print(question.show_submission(samples[question.name], show_community=True))

### Submit from sample



In [0]:
def submit_all():
  for question in questions:
    if question.name in samples:
      try:
        params = question.submit_from_samples(samples[question.name])
        print(f"Submitted Logistic{params} for {question.name}")
        print(f"https://pandemic.metaculus.com{question.page_url}")
      except requests.exceptions.HTTPError as e:
        print(f"Couldn't make prediction for {question.name} -- maybe this question is now closed? See error below.")
        print(e)
    else:
      print(f"No predictions for {question.name}")

Submit it!



In [0]:
submit_all()