In [None]:
# Add the covid_xprize directory to the system path.
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))

In [None]:
from datetime import datetime
import pandas as pd

from covid_xprize.scoring.predictor_scoring import load_dataset
from covid_xprize.validation.scenario_generator import generate_scenario
from covid_xprize.validation.scenario_generator import phase1_update

# Scenario generator

## Latest data

In [None]:
LATEST_DATA_URL = "https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker-legacy/main/legacy_data_202207/OxCGRT_latest.csv"
GEO_FILE = "../../countries_regions.csv"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)

In [None]:
len(latest_df.CountryName.unique())

In [None]:
len(latest_df.RegionName.unique())

# Scenario: historical IP until 2020-09-30
Latest historical data, truncated to the specified end date

In [None]:
start_date_str = None
end_date_str = "2020-09-30"
countries = None
output_file = "data/2020-09-30_historical_ip.csv"

In [None]:
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Historical")

In [None]:
scenario_df[scenario_df.CountryName == "Italy"].Date.max()

In [None]:
truncation_date = pd.to_datetime(end_date_str, format='%Y-%m-%d')
scenario_df = scenario_df[scenario_df.Date <= truncation_date]

In [None]:
scenario_df.tail()

In [None]:
# Write to file
# scenario_df.to_csv(output_file, index=False)

# Scenario: frozen NPIs
Latest historical data + frozen NPIS between last known date and end of Januaray 2021 for India and Mexico

## Generate

In [None]:
start_date_str = "2021-01-01"
end_date_str = "2021-01-31"
countries = ["India", "Mexico"]

In [None]:
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
len(scenario_df)

In [None]:
scenario_df.CountryName.unique()

In [None]:
scenario_df.tail()

## Save

In [None]:
# Write to a file
# hist_file_name = "data/future_ip.csv"
# scenario_df.to_csv(hist_file_name, index=False)

# Scenario: specific set of NPIs, freeze

## Generate

In [None]:
MAX_NPIS_DICT = {
    "C1_School closing": 3,
    "C2_Workplace closing": 3,
    "C3_Cancel public events": 2,
    "C4_Restrictions on gatherings": 4,
    "C5_Close public transport": 2,
    "C6_Stay at home requirements": 3,
    "C7_Restrictions on internal movement": 2,
    "C8_International travel controls": 4,
    "E1_Income support": 2,
    "E2_Debt/contract relief": 2,
    "E3_Fiscal measures": 1957600000000.00000,  # Max from file
    "E4_International support": 834353051822.00000,  # Max from file
    "H1_Public information campaigns": 2,
    "H2_Testing policy": 3,
    "H3_Contact tracing": 2,
    "H4_Emergency investment in healthcare": 242400000000.00000,  # Max from file
    "H5_Investment in vaccines": 100404615615.00000,  # Max from file
    "H6_Facial Coverings": 4,
    "H7_Vaccination policy": 5,
    "H8_Protection of elderly people": 3,
    # "M1_Wildcard": "text",  # Contains text
    "V1_Vaccine Prioritisation (summary)": 2,
    "V2A_Vaccine Availability (summary)": 3,
    # "V2B_Vaccine age eligibility/availability age floor (general population summary)": "0-4 yrs",  # Lowest age group
    # "V2C_Vaccine age eligibility/availability age floor (at risk summary)": "0-4 yrs",  # Lowest age group
    "V2D_Medically/ clinically vulnerable (Non-elderly)": 3,
    "V2E_Education": 2,
    "V2F_Frontline workers  (non healthcare)": 2,
    "V2G_Frontline workers  (healthcare)": 2,
    "V3_Vaccine Financial Support (summary)": 5,
    "V4_Mandatory Vaccination (summary)": 1
}

In [None]:
start_date_str = "2020-03-31"
end_date_str = "2020-06-30"
countries = ["India", "Mexico"]

In [None]:
scenario_df = generate_scenario(start_date_str,
                                end_date_str,
                                latest_df,
                                countries,
                                scenario="Freeze",
                                max_npis_dict=MAX_NPIS_DICT)

In [None]:
scenario_df.tail()

## Save

In [None]:
# Write to a file
# hist_file_name = "data/future_ip.csv"
# scenario_df.to_csv(hist_file_name, index=False)

# Robojudge test: December
IP file to test robojudge for the month of December

## Generate

In [None]:
today = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
start_date_str = "2020-12-01"
end_date_str = "2020-12-31"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = None
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
# Check: should contain all 366 days of 2020
nb_countries = len(scenario_df.CountryName.unique())
nb_regions = len(scenario_df.RegionName.unique()) - 1  # Ignore the '' region
len(scenario_df) / (nb_countries + nb_regions)

## Save

In [None]:
from datetime import datetime
sd = 20200101  # IP file always contains data since inception
ed = end_date_str.replace('-', "")
december_file_name = f"../../../covid-xprize-robotasks/ips/tests/{today}_{sd}_{ed}_ips.csv"
scenario_df.to_csv(december_file_name, index=False)
print(f"Saved to {december_file_name}")

# Robojudge: Official
IP file robojudge uses for its daily submissions evaluation

## Generate

In [None]:
# Handle US Virgin Islands
LATEST_DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker-legacy/main/legacy_data_202207/OxCGRT_latest.csv'
PHASE1_FILE = "../../countries_regions_phase1_fix.csv"
latest_df = load_dataset(LATEST_DATA_URL, PHASE1_FILE)
latest_df = phase1_update(latest_df)

In [None]:
today = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
start_date_str = "2020-12-22"
end_date_str = "2021-06-19"
countries = None
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
# Check: should contain 536 days:
# 366 days of 2020 + 170 days of 2021 (10 days in 2020 + 170 days in 2021 = 180 days of eval)
nb_countries = len(scenario_df.CountryName.unique())
nb_regions = len(scenario_df.RegionName.unique()) - 1  # Ignore the 'nan' region
len(scenario_df) / (nb_countries + nb_regions)

In [None]:
len(scenario_df.CountryName.unique())

In [None]:
len(scenario_df.RegionName.unique())

## Save

In [None]:
from datetime import datetime
sd = start_date_str.replace('-', "")
ed = end_date_str.replace('-', "")
december_file_name = f"../../../covid-xprize-robotasks/ips/live/{today}_{sd}_{ed}_ips.csv"
scenario_df.to_csv(december_file_name, index=False)
print(f"Saved to {december_file_name}")

# Prescriptions

## UK future test

In [None]:
start_date_str = None
end_date_str = "2021-02-14"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = ["United Kingdom"]
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Freeze")

In [None]:
scenario_df.to_csv("~/workspace/covid-xprize-robotasks/ips/prescriptions/uk_future_test_ips.csv", index=False)

## All, past and future, test

In [None]:
start_date_str = None
end_date_str = "2020-12-31"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = None
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Historical")

In [None]:
end_date = pd.to_datetime(end_date_str, format='%Y-%m-%d')

In [None]:
scenario_df = scenario_df[scenario_df.Date <= end_date]

In [None]:
scenario_df.Date.max()

In [None]:
scenario_df.to_csv("~/workspace/covid-xprize-robotasks/ips/prescriptions/all_2020_ips.csv", index=False)

## China early 2020 test

In [None]:
start_date_str = None
end_date_str = "2020-02-14"
latest_df = load_dataset(LATEST_DATA_URL, GEO_FILE)
countries = ["China"]
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries, scenario="Historical")

In [None]:
end_date = pd.to_datetime(end_date_str, format='%Y-%m-%d')
scenario_df = scenario_df[scenario_df.Date <= end_date]

In [None]:
scenario_df.tail()

In [None]:
len(scenario_df)

In [None]:
scenario_df.to_csv("~/workspace/covid-xprize-robotasks/ips/prescriptions/china_early_2020_ips.csv", index=False)