<a href="https://colab.research.google.com/github/oughtinc/ergo/blob/notebooks-readme/covid-19-average-lockdown.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [0]:
!pip install --quiet poetry  # Fixes https://github.com/python-poetry/poetry/issues/532
!pip install --quiet git+https://github.com/oughtinc/ergo.git
!pip install --quiet pendulum requests
!pip install --quiet torch

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
  Building wheel for ergo (PEP 517) ... [?25l[?25hdone


In [0]:
%load_ext google.colab.data_table

The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_table


In [0]:
import ergo
import pendulum
import requests
import torch
import pandas as pd

from typing import List
from pendulum import Date, Duration

In [0]:
def get_questions_for_cat(cat):
  r = requests.get(f"https://pandemic.metaculus.com/api2/questions/?search=cat:{cat}")
  return r.json()

def get_question_ids_for_cat(cat):
  qs = get_questions_for_cat(cat)
  return [q["id"] for q in qs["results"]]

In [0]:
def show_related_questions(related_question_ids):
  related_questions = []
  for id in related_question_ids:
    # try-except there b/c some "questions" aren't really questions and throw erros w/ metaculus
    # like https://pandemic.metaculus.com/questions/3957/lockdown-series-when-will-life-return-to-normal-ish/
    try:
      rq = metaculus.get_question(id) 
      related_questions.append(rq)
    except:
      pass

  def needs_attention(related_question):
    if not question.my_predictions:
      return True
    else:
      period = pendulum.from_timestamp(
          related_question.prediction_timeseries[-1]["t"]) - \
          pendulum.from_timestamp(question.my_predictions["predictions"][-1]["t"]
      )

    return period.in_seconds() > 1

  def direction(q):
    i = 1
    have_gone_far_back_enough = True

    while (have_gone_far_back_enough):
      try:
        period = pendulum.from_timestamp(
            q.prediction_timeseries[-i]["t"]) - \
            pendulum.from_timestamp(question.my_predictions["predictions"][-1]["t"]
        )
      except:
        i = i-1
        break

      if (period.in_seconds() < 1):
        have_gone_far_back_enough = False
      else:
        i = i+1

    if (isinstance(q.prediction_timeseries[-i]["community_prediction"], float)):
      old = q.prediction_timeseries[-i]["community_prediction"]
      new = q.prediction_timeseries[-1]["community_prediction"]
      return old - new
      # if (q.prediction_timeseries[-2]["community_prediction"] > q.prediction_timeseries[-1]["community_prediction"]):
      #   return new - old
      # elif (q.prediction_timeseries[-2]["community_prediction"] > q.prediction_timeseries[-1]["community_prediction"]):
      #   return "+"
      # else:
      #   return "="
    else:
      old = q.prediction_timeseries[-i]["community_prediction"]["q2"]
      new = q.prediction_timeseries[-1]["community_prediction"]["q2"]
      return old - new
      # if (q.prediction_timeseries[-2]["community_prediction"]["q2"] > q.prediction_timeseries[-1]["community_prediction"]["q2"]):
      #   return "-"
      # elif (q.prediction_timeseries[-2]["community_prediction"]["q2"] > q.prediction_timeseries[-1]["community_prediction"]["q2"]):
      #   return "+"
      # else:
      #   return "="

  related_questions_data = [
    [
     related_question.id, 
     needs_attention(related_question), 
     float(direction(related_question)),
     related_question.data["possibilities"]["scale"]["min"],
     related_question.data["possibilities"]["scale"]["max"],
     (pendulum.period(pendulum.parse(related_question.data["possibilities"]["scale"]["min"]), pendulum.parse(related_question.data["possibilities"]["scale"]["max"])) * float(direction(related_question))).in_hours(),
     related_question.title, 
     f"https://pandemic.metaculus.com{related_question.page_url}"]
    for related_question
    in related_questions
  ]

  print(related_questions[0].data["possibilities"]["scale"]["max"])
  print(related_questions[0].data["possibilities"]["scale"]["min"])

  df = pd.DataFrame(related_questions_data, columns=["id", "changed", "degree of change", "min", "max", "period", "title", "link"])

  return df.set_index("id")

In [0]:
def relate_questions_in_cat(cat):
  related_question_ids = get_question_ids_for_cat(cat)
  return show_related_questions(related_question_ids)


# Questions

Here is the question we want to forecast:

In [0]:
question_data = {
    "id": 3925,
    "name": "How many days will the average American spend under lockdown between 2020-03-25 and 2020-04-24",
}

In [0]:
metaculus = ergo.Metaculus(username="ought", password="", api_domain="pandemic")
# metaculus = ergo.Metaculus(username="oughttest", password="6vCo39Mz^rrb", api_domain="pandemic")
question = metaculus.get_question(question_data["id"], name=question_data["name"])

df = pd.DataFrame([[question.id, question.name]], columns=["id", "name"])
df.set_index("id")

# p = pendulum.instance(question.last_activity_time) - pendulum.from_timestamp(question.prediction_timeseries[-1]["t"])
# p.in_seconds() < 1

Unnamed: 0_level_0,name
id,Unnamed: 1_level_1
3925,How many days will the average American spend ...


# Data

Data: https://www.nytimes.com/interactive/2020/us/coronavirus-stay-at-home-order.html

Manually copied in on 2020-04-05

Updated on 2020-04-08:


*   Added three regions to Oklahoma: Claremore, Moore, and Sallisaw.
*   Added South Carolina state-wide lockdown



In [0]:
M = 1000000

USA_data = {
    "start": False,
    "pop": 327.2 * M,
    "regions": {
        "Alabama": {
            "start": pendulum.Date(2020, 4, 4),
            "pop": 4.9 * M
        },
        "Alaska": {
            "start": pendulum.Date(2020, 3, 31),
            "pop": 0.737 * M
        },
        "Arizona": {
            "start": pendulum.Date(2020, 3, 31),
            "pop": 7.2 * M
        },
        "California": {
            "start": pendulum.Date(2020, 3, 19),
            "pop": 39.6 * M
        },
        "Colorado": {
            "start": pendulum.Date(2020, 3, 26),
            "pop": 5.7 * M
        },
        "Connecticut": {
            "start": pendulum.Date(2020, 3, 23),
            "pop": 3.6 * M
        },
        "Delaware": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 0.973 * M
        },
        "District of Columbia": {
            "start": pendulum.Date(2020, 4, 1),
            "pop": 0.702 * M
        },
        "Florida": {
            "start": pendulum.Date(2020, 4, 3),
            "pop": 21.5 * M
        },
        "Georgia": {
            "start": pendulum.Date(2020, 4, 3),
            "pop": 10.6 * M
        },
        "Hawaii": {
            "start": pendulum.Date(2020, 3, 25),
            "pop": 1.4 * M
        },
        "Idaho": {
            "start": pendulum.Date(2020, 3, 25),
            "pop": 1.8 * M
        },
        "Illinois": {
            "start": pendulum.Date(2020, 3, 21),
            "pop": 12.7 * M
        },
        "Indiana": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 6.7 * M
        },
        "Kansas": {
            "start": pendulum.Date(2020, 3, 30),
            "pop": 2.9 * M
        },
        "Kentucky": {
            "start": pendulum.Date(2020, 3, 26),
            "pop": 4.5 * M
        },
        "Louisiana": {
            "start": pendulum.Date(2020, 3, 23),
            "pop": 4.6 * M
        },
        "Maine": {
            "start": pendulum.Date(2020, 4, 2),
            "pop": 1.3 * M
        },
        "Maryland": {
            "start": pendulum.Date(2020, 3, 30),
            "pop": 6 * M
        },
        "Massachusetts": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 6.9 * M
        },
        "Michigan": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 10 * M
        },
        "Minnesota": {
            "start": pendulum.Date(2020, 3, 27),
            "pop": 5.6 * M
        },
        "Mississippi": {
            "start": pendulum.Date(2020, 4, 3),
            "pop": 3 * M
        },
        "Missouri": {
            "start": pendulum.Date(2020, 4, 6),
            "pop": 6.1 * M
        },
        "Montana": {
            "start": pendulum.Date(2020, 3, 28),
            "pop": 1.1 * M
        },
        "Nevada": {
            "start": pendulum.Date(2020, 4, 1),
            "pop": 3.1 * M
        },
        "New Hampshire": {
            "start": pendulum.Date(2020, 3, 27),
            "pop": 1.4 * M
        },
        "New Jersey": {
            "start": pendulum.Date(2020, 3, 21),
            "pop": 8.9 * M
        },
        "New Mexico": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 2.1 * M
        },
        "New York": {
            "start": pendulum.Date(2020, 3, 22),
            "pop": 19.5 * M
        },
        "North Carolina": {
            "start": pendulum.Date(2020, 3, 30),
            "pop": 10.4 * M
        },
        "Ohio": {
            "start": pendulum.Date(2020, 3, 23),
            "pop": 11.7 * M
        },
        "Oklahoma": {
            "start": False,
            "pop": 3.9 * M,
            "regions": {
                "Claremore": {
                    "start": pendulum.Date(2020, 4, 6),
                    "pop": 0.019 * M
                },
                "Edmond": {
                    "start": pendulum.Date(2020, 3, 30),
                    "pop": 0.093 * M
                },
                "Moore": {
                    "start": pendulum.Date(2020, 4, 4),
                    "pop": 0.062 * M
                },
                "Norman": {
                    "start": pendulum.Date(2020, 3, 25),
                    "pop": 0.123 * M
                },
                "Oklahoma City": {
                    "start": pendulum.Date(2020, 3, 28),
                    "pop": 0.649 * M
                },
                "Sallisaw": {
                    "start": pendulum.Date(2020, 4, 4),
                    "pop": 0.009 * M
                },
                "Stillwater": {
                    "start": pendulum.Date(2020, 3, 30),
                    "pop": 0.05 * M
                },
                "Tulsa": {
                    "start": pendulum.Date(2020, 3, 28),
                    "pop": 0.401 * M
                }
            }
        },
        "Oregon": {
            "start": pendulum.Date(2020, 3, 23),
            "pop": 4.2 * M
        },
        "Pennsylvania": {
            "start": pendulum.Date(2020, 4, 1),
            "pop": 12.8 * M
        },
        "Puerto Rico": {
            "start": pendulum.Date(2020, 3, 15),
            "pop": 3.2 * M
        },
        "Rhode Island": {
            "start": pendulum.Date(2020, 3, 28),
            "pop": 1.1 * M
        },
        "South Carolina": {
            "start": pendulum.Date(2020, 4, 7),
            "pop": 5.1 * M,
            "regions": {
                "Charleston": {
                    "start": pendulum.Date(2020, 3, 26),
                    "pop": 0.136 * M
                },
                "Columbia": {
                    "start": pendulum.Date(2020, 3, 29),
                    "pop": 0.133 * M
                }
            }
        },
        "Tennessee": {
            "start": pendulum.Date(2020, 3, 31),
            "pop": 6.8 * M
        },
        "Texas": {
            "start": pendulum.Date(2020, 4, 2),
            "pop": 29 * M
        },
        "Utah": {
            "start": False,
            "pop": 3.2 * M,
            "regions": {
                "Davis County": {
                    "start": pendulum.Date(2020, 4, 1),
                    "pop": 0.352 * M
                },
                "Salt Lake County": {
                    "start": pendulum.Date(2020, 3, 30),
                    "pop": 1.2 * M
                },
                "Summit County": {
                    "start": pendulum.Date(2020, 3, 27),
                    "pop": 0.042 * M
                }
            }
        },
        "Vermont": {
            "start": pendulum.Date(2020, 3, 25),
            "pop": 0.626 * M
        },
        "Virginia": {
            "start": pendulum.Date(2020, 3, 30),
            "pop": 8.5 * M
        },
        "Washington": {
            "start": pendulum.Date(2020, 3, 23),
            "pop": 7.5 * M
        },
        "West Virginia": {
            "start": pendulum.Date(2020, 3, 24),
            "pop": 1.8 * M
        },
        "Wisconsin": {
            "start": pendulum.Date(2020, 3, 25),
            "pop": 5.8 * M
        },
        "Wyoming": {
            "start": False,
            "pop": 0.578 * M,
            "regions": {
                "Jackson": {
                    "start": pendulum.Date(2020, 3, 28),
                    "pop": 0.01 * M
                }
            }
        }
    }
}


#Assumptions

* Every place that is currently on lockdown had no subregion on lockdown prior to implementing the region-wide lockdown. We know this if false, but it's simple.

* Every place that is currently on lockdown will remain on lockdown through at least Apirl 25.

* Any state that isn't fullly locked down will have a 1% of entering a lockdown every day.

In [0]:
chance_of_full_lockdown_transition = 0.01

# Related Questions

In [0]:
relate_questions_in_cat("internal--lockdown-series")

2021-12-27
2020-03-27


Unnamed: 0_level_0,changed,degree of change,min,max,period,title,link
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3935,True,-0.00061,2020-03-27,2021-12-27,-9,When will the current lockdown in the UK be li...,https://pandemic.metaculus.com/questions/3935/...
3936,False,0.0,2020-03-27,2021-12-27,0,When will the current lockdown in Spain be lif...,https://pandemic.metaculus.com/questions/3936/...
3937,False,0.0,2020-03-27,2021-12-27,0,When will the current lockdown in Italy be lif...,https://pandemic.metaculus.com/questions/3937/...
3938,False,0.0,2020-03-27,2021-12-27,0,When will the current partial lockdown in the ...,https://pandemic.metaculus.com/questions/3938/...
3939,False,0.0,2020-03-27,2021-12-27,0,When will the current lockdown in the state of...,https://pandemic.metaculus.com/questions/3939/...
3940,True,-0.00059,2020-03-27,2021-12-27,-9,When will the current partial lockdown in the ...,https://pandemic.metaculus.com/questions/3940/...
3941,False,0.0,2020-03-27,2021-12-27,0,When will the current lockdown in Poland be li...,https://pandemic.metaculus.com/questions/3941/...
3942,True,-0.0009,2020-03-27,2021-12-27,-13,When will the current total lockdown in South ...,https://pandemic.metaculus.com/questions/3942/...
3946,True,-0.00053,2020-03-27,2021-12-27,-8,When will the current lockdown in India be lif...,https://pandemic.metaculus.com/questions/3946/...
3948,True,-0.00064,2020-03-27,2021-12-27,-9,When will the current lockdown in France be li...,https://pandemic.metaculus.com/questions/3948/...


# Model

In [0]:
def get_pop_in_lockdown_on_date(region, date):
  is_in_future = date > pendulum.now().date()

  if (region["start"] and region["start"] < date):
    return region["pop"]
  elif (is_in_future and ergo.flip(chance_of_full_lockdown_transition)):
    region["start"] = pendulum.now().date()
    return region["pop"]
  elif ("regions" in region):
    pop_in_lockdown = 0
    for region_key in region["regions"]:
      sub_region = region["regions"][region_key]
      pop_in_lockdown += get_pop_in_lockdown_on_date(sub_region, date)
    return pop_in_lockdown
  else:
    return 0

In [0]:
import numpy as np

def get_avg_pop_in_lockdown_over_range(region, start_date, end_date):
  period = pendulum.period(start_date, end_date)
  arr = np.array([])
  for dt in period.range("days"):
    arr = np.append(arr, get_pop_in_lockdown_on_date(region, dt))
  return np.mean(arr)

def get_avg_proportion_in_lockdown_over_range(region, start_date, end_date):
  return get_avg_pop_in_lockdown_over_range(region, start_date, end_date) / region["pop"]

We need to make a deep copy of the region data for each model so that model-specific changes don't carry over to the next run.

In [0]:
import copy

def model():
  usa_region = copy.deepcopy(USA_data)
  avg = get_avg_proportion_in_lockdown_over_range(usa_region, pendulum.Date(2020, 3, 25), pendulum.Date(2020, 4, 25))
  ergo.tag(torch.Tensor([avg*32]), question.name) # 3/25 to 4/25 inclusive is 32 days


In [0]:
samples = ergo.run(lambda: model(), num_samples=10)

samples

# Analysis

Histogram:

In [0]:
samples.hist(column=question.name)

Summary stats:

In [0]:
samples.describe()

# Submit predictions

Convert samples to Metaculus distributions and visualize:

In [0]:
if question.name in samples:
  question.show_submission(samples[question.name])
else:
  print(f"No predictions for {question.name}")
print("\n\n")

If everything looks good, submit the predictions!

In [0]:
# def submit_all():
#   for question in questions:
#     if question.name in samples:
#       try:
#         params = question.submit_from_samples(samples[question.name])
#         print(f"Submitted for {question.name}")
#         print(f"https://pandemic.metaculus.com{question.page_url}")
#       except requests.exceptions.HTTPError as e:
#         print(f"Couldn't make prediction for {question.name} -- maybe this question is now closed? See error below.")
#         print(e)
#     else:
#       print(f"No predictions for {question.name}")

# submit_all()

# To do

- Add to dos