In [None]:
# default_exp data

In [None]:
%load_ext autoreload
%autoreload 2

# Data

> This module generates a dataset of new cases predictions for 2020. The dataset is meant to train and test prescriptors. 

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
# export
import pandas as pd
import itertools
import numpy as np

from covid_xprize.standard_predictor import *
from covid_xprize.validation import scenario_generator

from covid_xprize.scoring.predictor_scoring import *
from covid_xprize.scoring.prescriptor_scoring import *

from covid_xprize.examples.prescriptors.neat import utils 
from covid_xprize.standard_predictor import xprize_predictor

from pathlib import Path

In [None]:
predictor = xprize_predictor.XPrizePredictor()

In [None]:
predictor.df.info()

In [None]:
generate_cases_and_stringency_for_prescriptions

In [None]:
xprize_predictor.NPI_COLUMNS


In [None]:
scenario_generator.MAX_NPIS

In [None]:
# export

def predict_cases(start, end, ips_historic=None , historical_cutoff_date='2020-07-31'):
    ''' Predict cases for every region in the specified time range for every combination of NPIs '''
    
    predictor = xprize_predictor.XPrizePredictor()
    scenario = scenario_generator.generate_scenario(start_date,
                      end_date,
                      latest_df,
                      countries=countries,
                      scenario="Historical")
    
    return scenario
    

In [None]:
# export

def npis_combinations(fixed_npis={}):
    ''' Generate all combinations of NPI values, excludig NPIs specified in de 'exclude_npis' list'''
    
    npi_values = [list(range(npi_max)) for npi_max in scenario_generator.MAX_NPIS]
    num_combinations = np.product(list(map(len, npi_values)))
    
    npi_combinations = list(itertools.product(*npi_values))
    assert len(npi_combinations), num_combinations
        
    return pd.DataFrame(npi_combinations, columns=scenario_generator.NPI_COLUMNS)

In [None]:
# export

def load_predictor_simulations(path='data/simulations'):
    sim_csvs = list(Path('data/simulations').rglob('*.csv'))
    sim_dfs = []
    
    for sim_csv in sim_csvs:
        sim_code = sim_csv.stem[:12]
        nips = list(map(int, list(sim_code)))
        
        df = pd.read_csv(sim_csv, low_memory=False)
        df = df.melt(id_vars='Date')
        df.rename(columns={'variable': 'GeoID', 'value': 'PredNewCases'}, inplace=True)
        df[xprize_predictor.NPI_COLUMNS] = nips
        df['SimCode'] = sim_code
        sim_dfs.append(df)
        
    return pd.concat(sim_dfs)
    
    

In [None]:
sim_df = load_predictor_simulations()

In [None]:
sim_df.info()

In [None]:
# export

def generate_scenario_multiverse(alt_universe_start_date='2020-08-01',
                                 alt_universe_end_date='2021-01-01',
                                 countries=None):
    start_date = pd.to_datetime(alt_universe_start_date, format='%Y-%m-%d')
    end_date = pd.to_datetime(alt_universe_end_date, format='%Y-%m-%d')

    # nips_combinations = npis_combinations()

    historical_df = predictor.df.copy()
    historical_df['NextNewCases'] = historical_df.groupby('GeoID')['SmoothNewCases'].transform('shift', -14)
    historical_df['RateNewCases'] = -historical_df.groupby('GeoID')['SmoothNewCases'].transform('pct_change', -14)
    historical_df['RateNewDeaths'] = -historical_df.groupby('GeoID')['SmoothNewDeaths'].transform('pct_change', -14)
    
    prediction_batches = []    
    predictor = xprize_predictor.XPrizePredictor()
    
    num_shuffles = 3
    for i in range(num_shuffles):
        shuffle_geoids_history()
    predictions = predictor.predict_from_df(start_date,
                                            end_date,
                                            nips_df=historical_df)

    
    
    
    return historical_df

In [None]:
start_date_str='2020-03-01'
end_date_str='2021-01-27'

In [None]:
test_history_df = generate_scenario_multiverse(start_date_str, end_date_str)

In [None]:
test_history_df.query('GeoID == "Mexico"').tail(30)

In [None]:
test_history_df.to_csv('test_history_df.csv', index=False)

In [None]:
start_date_str='2020-06-01'
end_date_str='2021-01-01'

predict.predict(pd.to_datetime(start_date_str, yearfirst=True),
                pd.to_datetime(end_date_str, yearfirst=True),
                path_to_ips_file='ips_test_scenarios.csv',
                output_file_path='./test_predict.csv')

In [None]:
historical_df = utils.prepare_historical_df()

In [None]:
historical_df.info()

In [None]:
latest_df = load_dataset(geos_file='./countries_regions.csv')
latest_df

In [None]:
from nbdev.export import notebook2script
notebook2script()