In [28]:
import pickle as pkl
import numpy as np
from collections import OrderedDict
from matplotlib import pyplot as plt
from pymc3.stats import quantiles
import os
import pandas as pd
from pathlib import Path
import sys

In [29]:
sys.path.append('/p/project/covid19dynstat/nieters1/BSTIM-Covid19/src')
from shared_utils import *

In [43]:
start = 84

with open('../data/counties/counties.pkl', "rb") as f:
    counties = pkl.load(f)

n_weeks = 3 # 3 week windows cut out!

countyByName = make_county_dict()

# plot_county_names = {"covid19": [county]}

start_day = pd.Timestamp('2020-01-28') + pd.Timedelta(days=start)
year = str(start_day)[:4]
month = str(start_day)[5:7]
day = str(start_day)[8:10]

day_folder_path = "../csv/{}_{}_{}".format(year, month, day)
Path(day_folder_path).mkdir(parents=True, exist_ok=True) # poss change to new variant.

In [44]:
prediction_region = "germany"
data = load_data_n_weeks(start, n_weeks, prediction_region, counties)

start_day = pd.Timestamp('2020-01-28') + pd.Timedelta(days=start)
i_start_day = 0
day_0 = start_day + pd.Timedelta(days=n_weeks*7+5)
day_m5 = day_0 - pd.Timedelta(days=5)
day_p5 = day_0 + pd.Timedelta(days=5)

_, target, _, _ = split_data(
    data,
    train_start=start_day,
    test_start=day_0,
    post_test=day_p5)

county_ids = target.columns
# county_id = countyByName[county]

# Load our prediction samples
res = load_predictions(start, n_weeks)
res_trend = load_trend_predictions(start, n_weeks)


In [45]:
n_days = (day_p5 - start_day).days

prediction_samples = np.reshape(res['y'], (res['y'].shape[0], -1, 412)) 
prediction_samples = prediction_samples[:,i_start_day:i_start_day+n_days,:]

prediction_samples_trend = np.reshape(res_trend['y'], (res_trend['y'].shape[0],  -1, 412))
prediction_samples_trend = prediction_samples_trend[:,i_start_day:i_start_day+n_days,:]

prediction_samples_trend_mu = np.reshape(res_trend['μ'], (res_trend['μ'].shape[0],  -1, 412))
prediction_samples_trend_mu = prediction_samples_trend[:,i_start_day:i_start_day+n_days,:]

ext_index = pd.DatetimeIndex([d for d in target.index] + \
        [d for d in pd.date_range(target.index[-1]+timedelta(1),day_p5-timedelta(1))])

In [46]:
prediction_quantiles = quantiles(prediction_samples, (5, 25, 75, 95)) 
prediction_quantiles_trend = quantiles(prediction_samples_trend, (5, 25, 75, 95))

In [47]:
prediction_mean = pd.DataFrame(
    data=np.mean(
        prediction_samples,
        axis=0),
    index=ext_index,
    columns=target.columns)
prediction_q25 = pd.DataFrame(
    data=prediction_quantiles[25],
    index=ext_index,
    columns=target.columns)
prediction_q75 = pd.DataFrame(
    data=prediction_quantiles[75],
    index=ext_index,
    columns=target.columns)
prediction_q5 = pd.DataFrame(
    data=prediction_quantiles[5],
    index=ext_index,
    columns=target.columns)
prediction_q95 = pd.DataFrame(
    data=prediction_quantiles[95],
    index=ext_index,
    columns=target.columns)

prediction_mean_trend = pd.DataFrame(
    data=np.mean(
        prediction_samples_trend_mu,
        axis=0),
    index=ext_index,
    columns=target.columns)
prediction_q25_trend = pd.DataFrame(
    data=prediction_quantiles_trend[25],
    index=ext_index,
    columns=target.columns)
prediction_q75_trend = pd.DataFrame(
    data=prediction_quantiles_trend[75],
    index=ext_index,
    columns=target.columns)
prediction_q5_trend = pd.DataFrame(
    data=prediction_quantiles_trend[5],
    index=ext_index,
    columns=target.columns)
prediction_q95_trend = pd.DataFrame(
    data=prediction_quantiles_trend[95],
    index=ext_index,
    columns=target.columns)

In [117]:
for (county, county_id) in countyByName.items():
    county_data = pd.DataFrame({
        'Raw Prediction Mean' : prediction_mean.loc[:,county_id].values,
        'Raw Prediction Q25' : prediction_q25.loc[:,county_id].values,
        'Raw Prediction Q75' : prediction_q75.loc[:,county_id].values,
        'Raw Prediction Q5' : prediction_q5.loc[:,county_id].values,
        'Raw Prediction Q95' : prediction_q95.loc[:,county_id].values,
        'Trend Prediction Mean' : prediction_mean_trend.loc[:,county_id].values,
        'Trend Prediction Q25' : prediction_q25_trend.loc[:,county_id].values,
        'Trend Prediction Q75' : prediction_q75_trend.loc[:,county_id].values,
        'Trend Prediction Q5' : prediction_q5_trend.loc[:,county_id].values,
        'Trend Prediction Q95' : prediction_q95_trend.loc[:,county_id].values,
        'RKI Meldedaten' : np.append(target.loc[:,county_id].values, np.repeat(np.nan, 5)),
        'is_nowcast' : (day_m5 <= ext_index) & (ext_index < day_0),
        'is_prediction' : (day_0 <= ext_index)},
        index = ext_index
    )
    county_data.to_csv("../csv/{}_{}_{}/{}.csv".format(year, month, day, countyByName[county]))

In [120]:
prediction_samples[1][1]

array([10,  6,  0,  1,  4,  6,  1, 11,  2, 18,  4, 10,  2,  3,  0,  0, 12,
        4,  8, 10,  2,  0,  0,  2,  3,  0,  0, 25, 22,  5,  2, 27, 11, 19,
        0,  1,  7, 13,  0,  4,  0,  3,  2,  8, 12,  0,  0,  0,  0,  1, 11,
        1,  1,  3, 14,  3,  1,  9, 18,  0,  6, 11, 13, 13,  0,  0,  9,  1,
        0,  0,  3,  2,  0, 18,  0, 26,  8,  0,  3, 38,  3,  7,  0, 10, 25,
       57,  3,  1,  2,  0,  0,  8,  0,  5,  6, 15, 33, 10,  0,  0, 28,  3,
        1,  3,  8,  1,  1,  2,  2, 14,  2,  5,  0,  6, 37,  6,  9, 43,  1,
        2,  3, 13,  0, 10, 56, 10,  2,  0,  0,  0, 25,  1,  1,  1,  0,  0,
        1,  0,  1,  3,  1,  6,  4,  3,  8,  4,  8,  0,  2,  0,  0,  9,  0,
        2,  3,  2,  0, 22,  1,  4,  6,  6, 17,  9,  2,  7, 12, 11,  4,  1,
        2,  1,  0,  9,  7,  4,  6, 64,  8,  3,  4,  1,  1, 53,  1,  0,  2,
        0, 10,  2, 27,  0,  3,  0,  3,  1,  0,  5,  2,  0,  1,  1, 26, 36,
       11,  0,  3, 10,  0,  1,  3, 17, 10,  3,  0,  5,  2, 10,  3, 32,  3,
        6,  0,  4,  0,  4

In [121]:
test = np.mean(prediction_samples, axis=0)

In [124]:
test[1][2]

1.203