# We start off by reading the data

In [None]:
bl_dict = {
    'BB': 'Brandenburg', 
    'BE': 'Berlin',
    'BW': 'Baden-Württemberg', 
    'BY': 'Bayern', 
    'DE': 'Deutschland', 
    'HB': 'Bremen', 
    'HE': 'Hessen', 
    'HH': 'Hamburg', 
    'MV': 'Mecklenburg-Vorpommern', 
    'NI': 'Niedersachsen', 
    'NW': 'Nordrhein-Westfalen', 
    'RP': 'Rheinland-Pfalz', 
    'SH': 'Schleswig-Holstein', 
    'SL': 'Saarland', 
    'SN': 'Sachsen', 
    'ST': 'Sachsen-Anhalt', 
    'TH': 'Thüringen'
}

In [None]:
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go
BL_KURZEL = ['BB', 'BE', 'BW', 'BY', 'DE', 'HB', 'HE', 'HH', 'MV', 'NI', 'NW', 'RP', 'SH', 'SL', 'SN', 'ST', 'TH']

dfs = {}
for kurzel in BL_KURZEL:
    dfs[kurzel] = pd.read_pickle(f'data/df_vac_{kurzel}.pkl')

pd.options.plotting.backend = "plotly"


# colors
import plotly.express as px
from itertools import cycle

def get_palette():
    return cycle(px.colors.qualitative.Safe)
    

palette = get_palette()
#palette = cycle(['black', 'grey', 'red', 'blue'])
#palette = cycle(px.colors.sequential.PuBu)

## Let's take a look

In [None]:
dfs['DE'].tail(5)

In [None]:
current_official_doses = dfs['DE'].tail(1)['dosen_kumulativ']
#current_official_doses.values[0]
current_official_doses

In [None]:
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
def add_dosen_for_bl(kurzel):
    to_plot = dfs[kurzel].set_index('date')
    fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['shots_sum'],
                             mode='lines',
                             name=f'{bl_dict[kurzel]}',
                             marker_color=next(palette),
                             line=dict(width=2.5)))

for kurzel in BL_KURZEL:
    add_dosen_for_bl(kurzel)
palette = get_palette()
fig.update_layout(
        width=1200,
        height=600,
        title='Absolute verteilte Impfdosen'
    )
fig.show()

### Average doses of the last days

In [None]:
def avg_doses_of_last_x_days(last_days):
    data = []
    for i in range(1, last_days):
        data.append([i, int(dfs['DE'].tail(i)['shots_today'].sum() /i)])

    a = pd.DataFrame(columns=["Last x days", "Average vacs"], data=data)

    fig = go.Figure(data=[
        go.Bar(name='Three',x=a['Last x days'], y=a['Average vacs'])

    ])
    fig.update_layout(
        width=1000,
        height=350,
        title=f'Durchschnitt verteilte Impfdosen der letzten x Tage (Stand: {datetime.now().strftime("%Y-%m-%d")})'
    )
    fig.show()
    return a


In [None]:
a = avg_doses_of_last_x_days(100)
a.head(7)

In [None]:
a = avg_doses_of_last_x_days(365)



In [None]:
def add_rolling(fig, df, days, kurzel):
    try:
        name = bl_dict[kurzel]
    except KeyError:
        name = kurzel
    fig.add_trace(go.Scatter(x = df.index,
                         y=df.shots_today.rolling(days).mean(),
                         mode='lines',
                         name=name,
                         marker_color=next(palette),
                         line=dict( width=4)))

def add_traces(fig, df):
    fig.add_trace(go.Scatter(x = df.index,
                             y=df.shots_today,
                             mode='markers',
                             name='Real',
                             line=dict(color='grey', width=1)))

palette = get_palette()
fig7 = go.Figure()
for kurzel in BL_KURZEL:
    to_plot = dfs[kurzel].set_index('date')    
    add_rolling(fig7, to_plot, 7, kurzel)


palette = get_palette()
fig21 = go.Figure()
for kurzel in BL_KURZEL:
    to_plot = dfs[kurzel].set_index('date')
    add_rolling(fig21, to_plot, 21, kurzel)

palette = get_palette()
fig_BY = go.Figure()
to_plot = dfs['BY'].set_index('date')
add_rolling(fig_BY, to_plot, 7, '7 Tage')
add_rolling(fig_BY, to_plot, 14, '14 Tage')
add_rolling(fig_BY, to_plot, 21, '21 Tage')

palette = get_palette()
fig_DE = go.Figure()
to_plot = dfs['DE'].set_index('date')
add_rolling(fig_DE, to_plot, 7, '7 Tage')
add_rolling(fig_DE, to_plot, 14, '14 Tage')
add_rolling(fig_DE, to_plot, 21, '21 Tage')


fig7.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit rollendem 7 Tage Fenster'
)

fig21.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit rollendem 21 Tage Fenster'
)

fig_BY.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit verschiedenen rollenden Fenstern (Bayern)'
)

fig_DE.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit verschiedenen rollenden Fenstern (Deutschland)'
)

fig7.show()
fig21.show()
fig_BY.show()
fig_DE.show()

## Vaccinations per weekday

In [None]:

to_plot_all = dfs['DE'].groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_10 = dfs['DE'].tail(7*10).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_5 = dfs['DE'].tail(7*5).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_2 = dfs['DE'].tail(7*2).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_1 = dfs['DE'].tail(7*1).groupby(["weekday_name"])['shots_today'].mean().sort_values()

palette = get_palette()
fig = go.Figure(data=[
    go.Bar(name='complete timeline', y=to_plot_all, x=to_plot_all.index),
    go.Bar(name='10 weeks', y=to_plot_10, x=to_plot_10.index),
    go.Bar(name='5 weeks', y=to_plot_5, x=to_plot_5.index),
    go.Bar(name='2 weeks', y=to_plot_2, x=to_plot_2.index),
    go.Bar(name='last week', y=to_plot_1, x=to_plot_1.index),
])
fig.update_layout(
    width=1000,
    height=500,
    title='Verteilung der absoluten wöchentlichen Impfungen über die Wochentage',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
fig.write_html("plots/covid-plot-weekdays_total.html")
fig.show()

In [None]:
def helper(number, sum):
    result = number / sum
    # print(f"Number is {number}, sum is {sum}, result is {result}")
    return result
    
def weekday_vac_proportion(df):
    df = df.groupby(["weekday_name"]).mean()
    df['shots_today']
    sum = df['shots_today'].sum()
    df['vac_proportion'] = df.apply(lambda x: helper(x['shots_today'], sum), axis=1)
    return df

In [None]:
to_plot_all = weekday_vac_proportion(dfs['DE'])
to_plot_10 = weekday_vac_proportion(dfs['DE'].tail(7*10))
to_plot_5 = weekday_vac_proportion(dfs['DE'].tail(7*5))
to_plot_2 = weekday_vac_proportion(dfs['DE'].tail(7*2))
to_plot_1 = weekday_vac_proportion(dfs['DE'].tail(7*1))

palette = get_palette()
fig = go.Figure(data=[
    go.Bar(name='complete timeline', y=to_plot_all['vac_proportion'], x=to_plot_all.index),
    go.Bar(name='10 weeks', y=to_plot_10['vac_proportion'], x=to_plot_10.index),
    go.Bar(name='5 weeks', y=to_plot_5['vac_proportion'], x=to_plot_5.index),
    go.Bar(name='2 weeks', y=to_plot_2['vac_proportion'], x=to_plot_2.index),
    go.Bar(name='1 weeks', y=to_plot_1['vac_proportion'], x=to_plot_1.index),

])
fig.update_layout(
    width=1000,
    height=500,
    title='Verteilung der wöchentlichen Impfungen über die Wochentage in Prozent',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
fig.write_html("plots/covid-plot-weekdays_prop.html")
fig.show()

## Vaccinations per calendar week

In [None]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum['year_and_week'] = to_plot_sum.index


In [None]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum.reindex()

In [None]:
def vac_per_calendar_week(df, where):
    # tail(length - 5) removes 53rd calendar week
    to_plot_sum = df.groupby(['year_and_week']).sum()
    to_plot_mean = df.groupby(['year_and_week']).mean()
    to_plot_sum['year_and_week'] = to_plot_sum.index
    to_plot_mean['year_and_week'] = to_plot_mean.index
    to_plot_mean.year_and_week = to_plot_mean.year_and_week.apply(str)
    to_plot_sum.year_and_week = to_plot_sum.year_and_week.apply(str)


    fig = go.Figure(data=[
        go.Bar(name='Summe', y=to_plot_sum['shots_today'], x=to_plot_sum['year_and_week']),
        go.Bar(name='Durchschnitt', y=to_plot_mean['shots_today'], x=to_plot_mean['year_and_week']),
    ])
    fig.update_layout(
        width=1000,
        height=500,
        title=f'Impfungen pro Kalender Woche ({where})'
    )
    fig.show()

vac_per_calendar_week(dfs['DE'], 'Deutschland')
vac_per_calendar_week(dfs['BY'], 'Bayern')
vac_per_calendar_week(dfs['HE'], 'Hessen')

In [None]:
df = dfs['BY']
to_plot_sum = df.tail(len(df.index) - 9).groupby(["calendar_week"])['shots_today'].sum()
to_plot_sum.tail(20)
df.head(10)
df['date'].dt.year

df_filtered = df.loc[lambda x: df['date'].dt.year >= 2021]
df_filtered.head(10)


In [None]:
def is_next_day_weekend(df):
    next_day = df.tail(1).date + pd.DateOffset(1)
    if next_day.dt.dayofweek.values[0] > 4:
        return True
    else:
        return False

DAYS_TO_LOOK_BACK = 2

import math

def guess_next_days_vacs(df, is_weekend):
    df_filtered = df[df['is_weekend']==is_weekend]
    mean = df_filtered.tail(DAYS_TO_LOOK_BACK)['shots_today'].values.mean()
    return math.ceil(mean)


speed_list = []

for kurzel in BL_KURZEL:
    df = dfs[kurzel]
    weekdays = guess_next_days_vacs(df, False)
    weekends = guess_next_days_vacs(df, True)
    speed_list.append([bl_dict[kurzel], weekdays, weekends])
    #print(f"{kurzel} -> Last {DAYS_TO_LOOK_BACK} days mean for (Mon Tue Wed, Thu, Fri): {weekdays} and for (Sat, Sun): {weekends} ")

speed_df = pd.DataFrame(speed_list,columns = ['bundesland', 'speed_weekday','speed_weekend'])
speed_df = speed_df.sort_values('speed_weekday', ascending=False)

fig = px.bar(speed_df, x='bundesland', y='speed_weekday',labels={
                     "bundesland": "Bundesland",
                     "speed_weekday": "Tägliche Impfungen (Vermutung)",
        },)
fig.update_layout(
        width=700,
        height=400,
        title=f'Abschätzung täglicher Impfungen for Land und Bundesländer'
    )
fig.show()