In [None]:
%config InlineBackend.figure_formats = ['svg']
%matplotlib inline

In [None]:
import oscovida
import pandas as pd
import matplotlib.pyplot as plt
from oscovida import unpack_region_subregion, germany_get_region, label_from_region_subregion, get_compare_data, \
    align_sets_at, plot_logdiff_time

In [None]:
oscovida.make_compare_plot_germany((None, "LK Pinneberg"));

In [None]:
def day0atleast(v0, series):
    try:
        day0 = series[series > v0].index[0]
    except IndexError:  # means no days found for which series.values > v0
        # print(f"Haven't found value > {v0} is Series {series.name}")
        result = pd.Series(dtype=object)
        return result

    # compute timedelta
    timedelta = series.index - day0
    # convert to int as index
    t = pd.to_numeric(timedelta.astype("timedelta64[D]").astype(int))
    # Assemble new series
    result = pd.Series(index=t, data=series.values)
    # DDD print(f"{series.name} ", result)
    return result


In [None]:
res = pd.DataFrame()

In [None]:
res.to_html(open("test.html", "tw"))

In [None]:
!open test.html

In [None]:
def align_sets_at(v0, df):
    """Accepts data frame, and aligns so that all enttries close to v0 are on the same row.

    Returns new dataframe with integer index (reprenting days after v0).
    """
    res = pd.DataFrame()

    i = 0
    for col in df.columns:
        i += 1
        # res[col] = day0for(v0, df[col])
        series = day0atleast(v0, df[col])
        series.name = col
        res = pd.merge(res, series, how='outer', left_index=True, right_index=True)
        # DDD print(f"{series.name} ", series)
        res.to_html(open(f'test-html-{i}.html', 'tw'))
        
    return res


In [None]:
axes, res_c, res_d = oscovida.make_compare_plot_germany("Hamburg", 
                                               compare_with_local=['Baden-Württemberg', 'Bayern', 
                                                                   'Berlin', 'Brandenburg', 'Bremen', 
                                                                   'Hamburg', 'Hessen', 'Mecklenburg-Vorpommern', 
                                                                   'Niedersachsen', 'Nordrhein-Westfalen', 
                                                                   'Rheinland-Pfalz', 'Saarland', 'Sachsen', 
                                                                   'Sachsen-Anhalt', 'Schleswig-Holstein', 
                                                                   'Thüringen']);

In [None]:
axes, res_c, res_d = oscovida.make_compare_plot_germany("Hamburg", 
                                               compare_with_local=['Bayern', 
                                                                   'Berlin', 'Bremen', 
                                                                   'Hamburg', 'Hessen', 
                                                                   'Nordrhein-Westfalen', 
                                                                   'Sachsen-Anhalt'
                                                                   ]);

In [None]:
axes, res_c, res_d = oscovida.make_compare_plot_germany((None, "LK Pinneberg"));

In [None]:
def get_compare_data_germany(region_subregion, compare_with_local, rolling=7):
    """Given a region_subregion for Germany, and a list of region_subregion to compare with,
    return two dataframes: one with cases and one with deaths
    where
    - each column is one country
    - data in the column is the diff of accumulated numbers
    - any zero values are removed for italy (data error)
    - apply some smoothing

    See unpack_region_subregion for details on region_subregion.
    """
    df_c = pd.DataFrame()
    df_d = pd.DataFrame()

    for reg_subreg in [region_subregion] + compare_with_local:

        region, subregion = unpack_region_subregion(reg_subreg)
        c, d = germany_get_region(state=region, landkreis=subregion)

        label = label_from_region_subregion((region, subregion))
        df_c[label] = c.diff().rolling(rolling, center=True).mean()  # cases
        df_d[label] = d.diff().rolling(rolling, center=True).mean()  # deaths

    return df_c, df_d


def make_compare_plot_germany(region_subregion,
                              compare_with=[], #"China", "Italy", "Germany"],
                              compare_with_local =['Bayern', 
                                                   'Berlin', 'Bremen', 
                                                   'Hamburg', 'Hessen', 
                                                   'Nordrhein-Westfalen', 
                                                   'Sachsen-Anhalt'], 
    # The 'compare_with_local' subset is chosen to look sensibly on 2 May 2020.
    #                          compare_with_local=['Baden-Württemberg', 'Bayern', 'Berlin',
    #                                              'Brandenburg', 'Bremen', 'Hamburg',
    #                                              'Hessen', 'Mecklenburg-Vorpommern', 'Niedersachsen',
    #                                              'Nordrhein-Westfalen', 'Rheinland-Pfalz', 'Saarland',
    #                                              'Sachsen', 'Sachsen-Anhalt', 'Schleswig-Holstein',  'Thüringen'],
                              v0c=10, v0d=1):
    rolling = 7
    region, subregion = unpack_region_subregion(region_subregion)
    df_c1, df_d1 = get_compare_data_germany((region, subregion), compare_with_local, rolling=rolling)
    df_c2, df_d2 = get_compare_data(compare_with, rolling=rolling)
    # DDD df_c1 okay here as r1

    # need to get index into same timezone before merging
    df_d1.set_index(df_d1.index.tz_localize(None), inplace=True)
    df_c1.set_index(df_c1.index.tz_localize(None), inplace=True)
    # DDD return df_c1 # okay as r15

    df_c = pd.merge(df_c1, df_c2, how='outer', left_index=True, right_index=True)
    df_d = pd.merge(df_d1, df_d2, how='outer', left_index=True, right_index=True)
    # return df_c # okay as r17
    
    res_c = align_sets_at(v0c, df_c)
    res_d = align_sets_at(v0d, df_d)
    
    # We get NaNs for some lines. This seems to originate in the original data set not having a value recorded
    # for all days. 
    # For the purpose of this plot, we'll just interpolate between the last and next known values
    # We limit the number of fills to 3 days. (Just a guess to avoid accidental filling of too many NaNs.)
    
    res_c = res_c.interpolate(method='linear', limit=3)
    res_d = res_d.interpolate(method='linear', limit=3)
    
    # return res_c   ### broken here (r20)

    fig, axes = plt.subplots(2, 1, figsize=(10, 6))
    ax=axes[0]
    plot_logdiff_time(ax, res_c, f"days since {v0c} cases",
                      "daily new cases\n(rolling 7-day mean)",
                      v0=v0c, highlight={res_c.columns[0]:"C1"}, labeloffset=0.5)
    ax = axes[1]

    res_d_0 = res_d[res_d.index >= 0]   # from "day 0" only
    # if we have values in between 0.1 and 1, set the lower `y_limit` on the graph to 0.1
    if res_d_0[(res_d_0 > 0.1) & (res_d_0 < 1)].any().any():    # there must be a more elegant check
        y_limit = 0.1
    else:
        y_limit = v0d
    plot_logdiff_time(ax, res_d, f"days since {v0d} deaths",
                      "daily new deaths\n(rolling 7-day mean)",
                      v0=y_limit, highlight={res_d.columns[0]:"C0"},
                      labeloffset=0.5)

    # fig.tight_layout(pad=1)

    title = f"Daily cases (top) and deaths (below) for Germany: {label_from_region_subregion((region, subregion))}"
    axes[0].set_title(title)

    return axes, res_c, res_d



In [None]:
rx = make_compare_plot_germany((None, "LK Pinneberg"));

In [None]:
rx = make_compare_plot_germany((None, "LK Pinneberg"));

In [None]:
p = pd.Series(0)

In [None]:
p/0

In [None]:
r20

In [None]:
r15


In [None]:
r1

In [None]:
pd.set_option("max_rows", None)