# Prescriptor submission


In [1]:
import pandas as pd

## Inputs

### Past interventions
Like for predict.py, prescribe.py expects the list of past interventions for a list of countries. For example:

In [2]:
EXAMPLE_IP_FILE = "../../../covid_xprize/validation/data/2020-09-30_historical_ip.csv"
ip_df = pd.read_csv(EXAMPLE_IP_FILE,
                    parse_dates=['Date'],
                    dtype={"RegionName": str},
                    encoding="ISO-8859-1")
ip_df.tail()

Unnamed: 0,CountryName,RegionName,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
64659,Zimbabwe,,2020-09-26,2.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,3.0
64660,Zimbabwe,,2020-09-27,2.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,3.0
64661,Zimbabwe,,2020-09-28,2.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,3.0
64662,Zimbabwe,,2020-09-29,2.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,1.0
64663,Zimbabwe,,2020-09-30,2.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,1.0


### Intervention costs
Intervention plan costs can differ across regions. For example, closing public transportation may be
much costlier in London than it is in Los Angeles. Such preferences are expressed as weights
associated with each intervention plan dimension, given to the prescriptor as input for each region.

For example:

In [3]:
EXAMPLE_COSTS_FILE = "../../../covid_xprize/validation/data/uniform_random_costs.csv"
costs_df = pd.read_csv(EXAMPLE_COSTS_FILE,
                       dtype={"RegionName": str},
                       encoding="ISO-8859-1")
costs_df.head()

Unnamed: 0,CountryName,RegionName,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Afghanistan,,0.83,1.71,1.44,0.13,1.16,1.43,0.55,1.44,1.02,1.25,0.67,0.38
1,Albania,,0.14,1.44,0.1,1.79,0.1,1.44,1.44,0.1,1.6,1.62,0.49,1.74
2,Algeria,,0.06,0.13,0.55,0.87,1.92,1.9,1.95,1.02,1.42,0.26,0.6,1.31
3,Andorra,,0.33,1.56,1.45,0.22,0.22,0.8,0.96,1.55,0.4,1.63,1.52,1.35
4,Angola,,1.01,0.51,0.76,0.55,0.89,0.57,1.54,1.5,0.34,0.99,1.77,1.56


## Expected output
A call to `prescribe.py` should produce this kind of file:

In [4]:
EXAMPLE_OUTPUT_FILE = "../../../data/2020-08-01_2020-08-04_prescriptions_example.csv"
prediction_output_df = pd.read_csv(EXAMPLE_OUTPUT_FILE,
                                   parse_dates=['Date'],
                                   encoding="ISO-8859-1")
prediction_output_df.head()

Unnamed: 0,PrescriptionIndex,CountryName,RegionName,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,0,Aruba,,2020-08-01,0,1,1,0,0,2,1,3,1,2,0,2
1,0,Aruba,,2020-08-02,0,2,0,1,1,1,1,0,0,0,1,3
2,0,Aruba,,2020-08-03,0,0,1,3,0,1,1,2,1,1,0,3
3,0,Aruba,,2020-08-04,1,2,1,3,0,2,1,2,1,2,1,2
4,0,Afghanistan,,2020-08-01,1,1,1,1,0,0,0,3,1,1,0,3


# Prescribe

In [5]:
start_date = "2020-08-01"
end_date = "2020-08-04"
ip_file = "../../../covid_xprize/validation/data/2020-09-30_historical_ip.csv"
costs_file = "../../../covid_xprize/validation/data/uniform_random_costs.csv"
output_file = "prescriptions/2020-08-01_2020-08-04.csv"

In [6]:
!python prescribe.py -s {start_date} -e {end_date} -ip {ip_file} -c {costs_file} -o {output_file}

Generating prescriptions from 2020-08-01 to 2020-08-04...
Done!


In [7]:
!head {output_file}

GeoID,CountryName,RegionName,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,PrescriptionIndex
Afghanistan,Afghanistan,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Albania,Albania,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Algeria,Algeria,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Andorra,Andorra,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Angola,Angola,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Argentina,Argentina,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Aruba,Aruba,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Australia,Australia,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0
Austria,Austria,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4,0


# Validate

In [8]:
from covid_xprize.validation.prescriptor_validation import validate_submission

In [9]:
errors = validate_submission(start_date, end_date, ip_file, output_file)
if errors:
    for error in errors:
        print(error)
else:
    print("All good!")

All good!


# Evaluate

## Load generated prescriptions

In [10]:
presc_df = pd.read_csv(output_file)

In [11]:
prescription_indexes = list(presc_df.PrescriptionIndex.unique())
print(f"Loaded {len(prescription_indexes)} prescriptions:")
prescription_indexes

Loaded 1 prescriptions:


[0]

In [12]:
# Look at the first prescription plan
idx = 0
idx_df = presc_df[presc_df['PrescriptionIndex'] == idx]
idx_df = idx_df.drop(columns='PrescriptionIndex') # Predictor doesn't need this
idx_df.head()

Unnamed: 0,GeoID,CountryName,RegionName,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Afghanistan,Afghanistan,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4
1,Albania,Albania,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4
2,Algeria,Algeria,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4
3,Andorra,Andorra,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4
4,Angola,Angola,,2020-08-01,3,3,2,4,2,3,2,4,2,3,2,4


In [13]:
# Save it to a file
presc_x_file = "prescriptions/2020-08-01_2020-08-04_0.csv"
idx_df.to_csv(presc_x_file)

## Compute prescription plan number of cases
Run the prescription plan through the predictor to get its predicted number of daily cases

In [14]:
# Run the standard predictor
import os
pred_x_file = "predictions/2020-08-01_2020-08-04_0.csv"
os.makedirs(os.path.dirname(pred_x_file), exist_ok=True)
!python ../../../predict.py -s {start_date} -e {end_date} -ip {presc_x_file} -o {pred_x_file}

Generating predictions from 2020-08-01 to 2020-08-04...
Saved predictions to predictions/2020-08-01_2020-08-04_0.csv
Done!


In [15]:
# Load the predictions
preds_x_df = pd.read_csv(pred_x_file)
preds_x_df.head()

Unnamed: 0,CountryName,RegionName,Date,PredictedDailyNewCases
0,Afghanistan,,2020-08-01,63.842384
1,Afghanistan,,2020-08-02,139.567887
2,Afghanistan,,2020-08-03,126.598322
3,Afghanistan,,2020-08-04,128.225243
4,Albania,,2020-08-01,95.078698


## Compute prescription plan stringency

In [16]:
# Load IP cost weights
cost_df = pd.read_csv(costs_file)
cost_df.head()

Unnamed: 0,CountryName,RegionName,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Afghanistan,,0.83,1.71,1.44,0.13,1.16,1.43,0.55,1.44,1.02,1.25,0.67,0.38
1,Albania,,0.14,1.44,0.1,1.79,0.1,1.44,1.44,0.1,1.6,1.62,0.49,1.74
2,Algeria,,0.06,0.13,0.55,0.87,1.92,1.9,1.95,1.02,1.42,0.26,0.6,1.31
3,Andorra,,0.33,1.56,1.45,0.22,0.22,0.8,0.96,1.55,0.4,1.63,1.52,1.35
4,Angola,,1.01,0.51,0.76,0.55,0.89,0.57,1.54,1.5,0.34,0.99,1.77,1.56


In [17]:
from covid_xprize.standard_predictor.xprize_predictor import NPI_COLUMNS
from covid_xprize.scoring.prescriptor_scoring import weight_prescriptions_by_cost

In [18]:
# Apply weights to prescriptions
weighted_x_df = weight_prescriptions_by_cost(idx_df, cost_df)
weighted_x_df.head()

Unnamed: 0,GeoID,CountryName,RegionName,Date,C1_School closing_pres,C2_Workplace closing_pres,C3_Cancel public events_pres,C4_Restrictions on gatherings_pres,C5_Close public transport_pres,C6_Stay at home requirements_pres,...,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Afghanistan,Afghanistan,,2020-08-01,3,3,2,4,2,3,...,2.88,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52
1,Afghanistan,Afghanistan,,2020-08-02,3,3,2,4,2,3,...,2.88,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52
2,Afghanistan,Afghanistan,,2020-08-03,3,3,2,4,2,3,...,2.88,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52
3,Afghanistan,Afghanistan,,2020-08-04,3,3,2,4,2,3,...,2.88,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52
4,Albania,Albania,,2020-08-01,3,3,2,4,2,3,...,0.2,7.16,0.2,4.32,2.88,0.4,3.2,4.86,0.98,6.96


In [19]:
weighted_x_df['Stringency'] = weighted_x_df[NPI_COLUMNS].sum(axis=1)

In [20]:
weighted_x_df.head()

Unnamed: 0,GeoID,CountryName,RegionName,Date,C1_School closing_pres,C2_Workplace closing_pres,C3_Cancel public events_pres,C4_Restrictions on gatherings_pres,C5_Close public transport_pres,C6_Stay at home requirements_pres,...,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,Stringency
0,Afghanistan,Afghanistan,,2020-08-01,3,3,2,4,2,3,...,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52,33.14
1,Afghanistan,Afghanistan,,2020-08-02,3,3,2,4,2,3,...,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52,33.14
2,Afghanistan,Afghanistan,,2020-08-03,3,3,2,4,2,3,...,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52,33.14
3,Afghanistan,Afghanistan,,2020-08-04,3,3,2,4,2,3,...,0.52,2.32,4.29,1.1,5.76,2.04,3.75,1.34,1.52,33.14
4,Albania,Albania,,2020-08-01,3,3,2,4,2,3,...,7.16,0.2,4.32,2.88,0.4,3.2,4.86,0.98,6.96,35.9


In [21]:
mean_new_cases = preds_x_df.PredictedDailyNewCases.mean()
mean_stringency = weighted_x_df.Stringency.mean()
print(f"Mean predicted daily new cases: {mean_new_cases}")
print(f"Mean stringency: {mean_stringency}")

Mean predicted daily new cases: 1663.8022552808468
Mean stringency: 34.015762711864404


# Robojudge
Time to compare these prescriptions to others: have a look at the [prescriptor robojudge notebook](prescriptor_robojudge.ipynb)