# We start off by reading the data

In [101]:
import shared

bl_dict = shared.bl_dict
bl_kurzel = shared.bl_kurzel

In [102]:
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go

dfs = {}
for kurzel in bl_kurzel:
    dfs[kurzel] = pd.read_pickle(f'data/df_vac_{kurzel}.pkl')

pd.options.plotting.backend = "plotly"
pd.set_option('float_format', '{:,.1f}'.format)

# colors
import plotly.express as px
from itertools import cycle

palette = shared.get_palette()


## Let's take a look

In [103]:
dfs['DE'].tail(5)

Unnamed: 0,date,publication_date,dosen_kumulativ,personen_erst_kumulativ,personen_voll_kumulativ,shots_today,shots_sum,is_weekend,weekday_name,calendar_week,year_and_week,shots_sum_100k,shots_today_100k
475,2022-04-15,,,,,8335.6,172511850,False,Fri,15,2022_15,207369.5,10.0
476,2022-04-16,,,,,8335.6,172520186,True,Sat,15,2022_15,207379.5,10.0
477,2022-04-17,,,,,8335.6,172528521,True,Sun,15,2022_15,207389.6,10.0
478,2022-04-18,2022-04-19 08:00,172536857.0,60118591.0,63265523.0,8335.6,172536857,False,Mon,16,2022_16,207399.6,10.0
479,2022-04-19,2022-04-20 08:00,172566113.0,60123492.0,63270874.0,29256.0,172566113,False,Tue,16,2022_16,207434.7,35.2


In [104]:
current_official_doses = dfs['DE'].tail(1)['dosen_kumulativ']
#current_official_doses.values[0]
current_official_doses

479   172,566,113.0
Name: dosen_kumulativ, dtype: float64

### Irgendwas mit Erst- Zweitimpfungen

In [105]:
palette = shared.get_palette()
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['personen_erst_kumulativ'],
                             mode='lines',
                             name='Erst geimpft',
                             marker_color=next(palette),
                             line=dict(width=2.5)))
fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['personen_voll_kumulativ'],
                             mode='lines',
                             name='Voll geimpft',
                             marker_color=next(palette),
                             line=dict(width=2.5)))
fig.update_layout(
        width=900,
        height=600,
        title='Erstimpfungen und Zweitimpfungen (Deutschland)'
    )
fig.show()

In [106]:
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
def add_dosen_for_bl(kurzel):
    to_plot = dfs[kurzel].set_index('date')
    fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['shots_sum'],
                             mode='lines',
                             name=f'{bl_dict[kurzel]}',
                             marker_color=next(palette),
                             line=dict(width=2.5)))

palette = shared.get_palette()
for kurzel in bl_kurzel:
    add_dosen_for_bl(kurzel)

fig.update_layout(
        width=1050,
        height=600,
        title='Absolute verteilte Impfdosen'
    )
shared.write_html(fig, 'vac_shots-bl-absolute')
fig.show()

In [107]:
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
def add_dosen_for_bl(kurzel):
    to_plot = dfs[kurzel].set_index('date')
    fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['shots_sum_100k'],
                             mode='lines',
                             name=f'{bl_dict[kurzel]}',
                             marker_color=next(palette),
                             line=dict(width=2.5)))

palette = shared.get_palette()
for kurzel in bl_kurzel:
    add_dosen_for_bl(kurzel)

fig.update_layout(
        width=1050,
        height=600,
        title='Given shots per 100k'
    )
shared.write_html(fig, 'vac_shots-bl-per-100k')
fig.show()

### Average doses of the last days

In [108]:
def avg_doses_of_last_x_days(last_days):
    data = []
    for i in range(1, last_days):
        data.append([i, int(dfs['DE'].tail(i)['shots_today'].sum() /i)])

    a = pd.DataFrame(columns=["Last x days", "Average vacs"], data=data)

    fig = go.Figure(data=[
        go.Bar(name='Three',x=a['Last x days'], y=a['Average vacs'])

    ])
    fig.update_layout(
        width=1000,
        height=350,
        title=f'Average shots given when looking at last x days'
    )
    shared.write_html(fig, f'avg-shots-last-{last_days}-days')
    fig.show()
    return a


In [109]:
a = avg_doses_of_last_x_days(100)
a.head(7)

Unnamed: 0,Last x days,Average vacs
0,1,29256
1,2,18795
2,3,15309
3,4,13565
4,5,12519
5,6,11822
6,7,15226


In [110]:
a = avg_doses_of_last_x_days(365)



In [111]:
def add_rolling(fig, df, days, kurzel, column='shots_today_100k'):
    try:
        name = bl_dict[kurzel]
    except KeyError:
        name = kurzel
    fig.add_trace(go.Scatter(x = df.index,
                         y=df[column].rolling(days).mean(),
                         mode='lines',
                         name=name,
                         marker_color=next(palette),
                         line=dict( width=3)))

def add_traces(fig, df, column='shots_today_100k'):
    fig.add_trace(go.Scatter(x = df.index,
                             y=df[column],
                             mode='markers',
                             name='Real',
                             marker=dict(
                                color='grey',
                                size=4,
                            ),
                        ))

palette = shared.get_palette()
fig7 = go.Figure()
for kurzel in bl_kurzel:
    to_plot = dfs[kurzel].set_index('date')    
    add_rolling(fig7, to_plot, 7, kurzel)


palette = shared.get_palette()
fig21 = go.Figure()
for kurzel in bl_kurzel:
    to_plot = dfs[kurzel].set_index('date')
    add_rolling(fig21, to_plot, 21, kurzel)

palette = shared.get_palette()
fig_BY = go.Figure()
to_plot = dfs['BY'].set_index('date')
add_traces(fig_BY, to_plot, column='shots_today')
add_rolling(fig_BY, to_plot, 7, '7 Days', column='shots_today')
add_rolling(fig_BY, to_plot, 14, '14 Days', column='shots_today')
add_rolling(fig_BY, to_plot, 21, '21 Days', column='shots_today')

palette = shared.get_palette()
fig_DE = go.Figure()
to_plot = dfs['DE'].set_index('date')
add_traces(fig_DE, to_plot, column='shots_today')
add_rolling(fig_DE, to_plot, 7, '7 Days', column='shots_today')
add_rolling(fig_DE, to_plot, 14, '14 Days', column='shots_today')
add_rolling(fig_DE, to_plot, 21, '21 Days', column='shots_today')


fig7.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Shots given per day with (per 100k) with rolling 7 day window'
)

fig21.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Shots given per day with (per 100k) with rolling 21 day window'
)

fig_BY.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Shots given per day with different rolling windows (Bavaria)'
)

fig_DE.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Shots given per day with different rolling windows (Germany)'
)

shared.write_html(fig7, 'vac_daily-shots-7-day-window')
shared.write_html(fig21, 'vac_daily-shots-21-day-window')
shared.write_html(fig_BY, 'vac_daily-shots-BY')
shared.write_html(fig_DE, 'vac_daily-shots-DE')

fig7.show()
fig21.show()
fig_BY.show()
fig_DE.show()

## Vaccinations per weekday

In [112]:

to_plot_all = dfs['DE'].groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_10 = dfs['DE'].tail(7*10).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_5 = dfs['DE'].tail(7*5).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_2 = dfs['DE'].tail(7*2).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_1 = dfs['DE'].tail(7*1).groupby(["weekday_name"])['shots_today'].mean().sort_values()

palette = shared.get_palette()
fig = go.Figure(data=[
    go.Bar(name='Whole timeframe', y=to_plot_all, x=to_plot_all.index),
    go.Bar(name='last 10 weeks', y=to_plot_10, x=to_plot_10.index),
    go.Bar(name='last 5 weeks', y=to_plot_5, x=to_plot_5.index),
    go.Bar(name='last 2 weeks', y=to_plot_2, x=to_plot_2.index),
    go.Bar(name='last week', y=to_plot_1, x=to_plot_1.index),
])
fig.update_layout(
    width=1000,
    height=500,
    title='Distribution of shots over the week days',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
shared.write_html(fig, "weekdays_total")
fig.show()

In [113]:
def helper(number, sum):
    result = number / sum
    # print(f"Number is {number}, sum is {sum}, result is {result}")
    return result
    
def weekday_vac_proportion(df):
    df = df.groupby(["weekday_name"]).mean()
    df['shots_today']
    sum = df['shots_today'].sum()
    df['vac_proportion'] = df.apply(lambda x: helper(x['shots_today'], sum), axis=1)
    return df

In [114]:
to_plot_all = weekday_vac_proportion(dfs['DE'])
to_plot_10 = weekday_vac_proportion(dfs['DE'].tail(7*10))
to_plot_5 = weekday_vac_proportion(dfs['DE'].tail(7*5))
to_plot_2 = weekday_vac_proportion(dfs['DE'].tail(7*2))
to_plot_1 = weekday_vac_proportion(dfs['DE'].tail(7*1))
vor_hausarztimpfungen = weekday_vac_proportion(dfs['DE'].head(7*15))

palette = shared.get_palette()
fig = go.Figure(data=[
    go.Bar(name='Whole timeframe', y=to_plot_all['vac_proportion'], x=to_plot_all.index),
    go.Bar(name='10 Weeks', y=to_plot_10['vac_proportion'], x=to_plot_10.index),
    go.Bar(name='5 Weeks', y=to_plot_5['vac_proportion'], x=to_plot_5.index),
    go.Bar(name='2 Weeks', y=to_plot_2['vac_proportion'], x=to_plot_2.index),
    go.Bar(name='1 Weeks', y=to_plot_1['vac_proportion'], x=to_plot_1.index),
    go.Bar(name='Before vax in doctors offices', y=vor_hausarztimpfungen['vac_proportion'], x=vor_hausarztimpfungen.index),

])
fig.update_layout(
    width=1000,
    height=500,
    title='Distribution of shots given over the week days',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
shared.write_html(fig, "weekdays_prop")
fig.show()

## Vaccinations per calendar week

In [115]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum['year_and_week'] = to_plot_sum.index


In [116]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum.reindex()

Unnamed: 0_level_0,dosen_kumulativ,personen_erst_kumulativ,personen_voll_kumulativ,shots_today,shots_sum,is_weekend,calendar_week,shots_sum_100k,shots_today_100k
year_and_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020_52,21566.0,21566.0,0.0,21566.0,21566,2,104,25.9,25.9
2020_53,1032511.0,1032511.0,0.0,244420.0,1119305,2,371,1345.5,293.8
2021_01,2724537.0,2724537.0,0.0,347361.0,3297649,2,7,3964.0,417.5
2021_02,5445050.0,5438469.0,6581.0,532531.0,6542069,2,14,7864.0,640.1
2021_03,8836941.0,8184566.0,652375.0,637240.0,10544889,2,21,12675.6,766.0
...,...,...,...,...,...,...,...,...,...
2022_12,858359879.0,300148099.0,315432731.0,356494.0,1201926281,2,84,1444786.9,428.5
2022_13,860028514.0,300316899.0,315791213.0,314246.0,1204241704,2,91,1447570.2,377.7
2022_14,861408111.0,300460443.0,316076617.0,252784.0,1206136139,2,98,1449847.4,303.9
2022_15,517375130.0,180329138.0,189750994.0,132641.4,1207439202,2,105,1451413.8,159.4


In [117]:
def vac_per_calendar_week(kurzel):
    df = dfs[kurzel]
    # tail(length - 5) removes 53rd calendar week
    to_plot_sum = df.groupby(['year_and_week']).sum()
    to_plot_mean = df.groupby(['year_and_week']).mean()
    to_plot_sum['year_and_week'] = to_plot_sum.index
    to_plot_mean['year_and_week'] = to_plot_mean.index
    to_plot_mean.year_and_week = to_plot_mean.year_and_week.apply(str)
    to_plot_sum.year_and_week = to_plot_sum.year_and_week.apply(str)


    fig = go.Figure(data=[
        go.Bar(name='Sum', y=to_plot_sum['shots_today'], x=to_plot_sum['year_and_week']),
        go.Bar(name='Daily Ø', y=to_plot_mean['shots_today'], x=to_plot_mean['year_and_week']),
    ])
    fig.update_layout(
        width=1000,
        height=500,
        title=f'Shots given per calendar week ({bl_dict[kurzel]})'
    )
    shared.write_html(fig, f'vac-per-calendar-week-{kurzel}')
    fig.show()

vac_per_calendar_week('DE')
vac_per_calendar_week('BY')
vac_per_calendar_week('HE')

In [118]:
df = dfs['BY']
to_plot_sum = df.tail(len(df.index) - 9).groupby(["calendar_week"])['shots_today'].sum()
to_plot_sum.tail(20)
df.head(10)
df['date'].dt.year

df_filtered = df.loc[lambda x: df['date'].dt.year >= 2021]
df_filtered.head(10)


Unnamed: 0,date,publication_date,dosen_kumulativ,personen_erst_kumulativ,personen_voll_kumulativ,shots_today,shots_sum,is_weekend,weekday_name,calendar_week,year_and_week,shots_sum_100k,shots_today_100k
6,2021-01-01,2021-01-02 08:00,39005.0,39005.0,0.0,1050.0,39005,False,Fri,53,2020_53,297.2,8.0
7,2021-01-02,2021-01-03 08:00,57833.0,57833.0,0.0,18828.0,57833,True,Sat,53,2020_53,440.6,143.5
8,2021-01-03,2021-01-04 12:00,66258.0,66258.0,0.0,8425.0,66258,True,Sun,53,2020_53,504.8,64.2
9,2021-01-04,2021-01-05 12:00,77876.0,77876.0,0.0,11618.0,77876,False,Mon,1,2021_01,593.4,88.5
10,2021-01-05,2021-01-06 11:00,82749.0,82749.0,0.0,4873.0,82749,False,Tue,1,2021_01,630.5,37.1
11,2021-01-06,2021-01-07 11:00,84349.0,84349.0,0.0,1600.0,84349,False,Wed,1,2021_01,642.7,12.2
12,2021-01-07,2021-01-08 11:00,88916.0,88916.0,0.0,4567.0,88916,False,Thu,1,2021_01,677.5,34.8
13,2021-01-08,2021-01-09 11:00,93966.0,93966.0,0.0,5050.0,93966,False,Fri,1,2021_01,715.9,38.5
14,2021-01-09,,,,,13085.0,107051,True,Sat,1,2021_01,815.6,99.7
15,2021-01-10,2021-01-11 11:00,120136.0,120136.0,0.0,13085.0,120136,True,Sun,1,2021_01,915.3,99.7


In [119]:
def is_next_day_weekend(df):
    next_day = df.tail(1).date + pd.DateOffset(1)
    if next_day.dt.dayofweek.values[0] > 4:
        return True
    else:
        return False

DAYS_TO_LOOK_BACK = 2

import math

def guess_next_days_vacs(df, is_weekend):
    df_filtered = df[df['is_weekend']==is_weekend]
    mean = df_filtered.tail(DAYS_TO_LOOK_BACK)['shots_today'].values.mean()
    return math.ceil(mean)


speed_list = []

for kurzel in bl_kurzel:
    df = dfs[kurzel]
    weekdays = guess_next_days_vacs(df, False)
    weekends = guess_next_days_vacs(df, True)
    speed_list.append([bl_dict[kurzel], weekdays, weekends])
    #print(f"{kurzel} -> Last {DAYS_TO_LOOK_BACK} days mean for (Mon Tue Wed, Thu, Fri): {weekdays} and for (Sat, Sun): {weekends} ")

speed_df = pd.DataFrame(speed_list,columns = ['bundesland', 'speed_weekday','speed_weekend'])
speed_df = speed_df.sort_values('speed_weekday', ascending=False)

fig = px.bar(speed_df, x='bundesland', y='speed_weekday',labels={
                     "bundesland": "Bundesland",
                     "speed_weekday": "Tägliche Impfungen (Vermutung)",
        },)
fig.update_layout(
        width=700,
        height=400,
        title=f'Abschätzung täglicher Impfungen for Land und Bundesländer'
    )
fig.show()

In [120]:
dfs['BY'].head(10)

Unnamed: 0,date,publication_date,dosen_kumulativ,personen_erst_kumulativ,personen_voll_kumulativ,shots_today,shots_sum,is_weekend,weekday_name,calendar_week,year_and_week,shots_sum_100k,shots_today_100k
0,2020-12-26,,0.0,0.0,0.0,0.0,0,True,Sat,52,2020_52,0.0,0.0
1,2020-12-27,2020-12-28 16:15,3389.0,3389.0,0.0,3389.0,3389,True,Sun,52,2020_52,25.8,25.8
2,2020-12-28,2020-12-29 08:00,5219.0,5219.0,0.0,1830.0,5219,False,Mon,53,2020_53,39.8,13.9
3,2020-12-29,,,,,11493.5,16712,False,Tue,53,2020_53,127.3,87.6
4,2020-12-30,2020-12-31 08:30,28206.0,28206.0,0.0,11493.5,28206,False,Wed,53,2020_53,214.9,87.6
5,2020-12-31,2021-01-01 12:30,37955.0,37955.0,0.0,9749.0,37955,False,Thu,53,2020_53,289.2,74.3
6,2021-01-01,2021-01-02 08:00,39005.0,39005.0,0.0,1050.0,39005,False,Fri,53,2020_53,297.2,8.0
7,2021-01-02,2021-01-03 08:00,57833.0,57833.0,0.0,18828.0,57833,True,Sat,53,2020_53,440.6,143.5
8,2021-01-03,2021-01-04 12:00,66258.0,66258.0,0.0,8425.0,66258,True,Sun,53,2020_53,504.8,64.2
9,2021-01-04,2021-01-05 12:00,77876.0,77876.0,0.0,11618.0,77876,False,Mon,1,2021_01,593.4,88.5


In [121]:
dfs['BY'].tail(10)

Unnamed: 0,date,publication_date,dosen_kumulativ,personen_erst_kumulativ,personen_voll_kumulativ,shots_today,shots_sum,is_weekend,weekday_name,calendar_week,year_and_week,shots_sum_100k,shots_today_100k
470,2022-04-10,2022-04-11 08:00,26468112.0,9226861.0,9833674.0,3439.0,26468112,True,Sun,14,2022_14,201665.8,26.2
471,2022-04-11,2022-04-12 08:00,26471410.0,9227229.0,9834378.0,3298.0,26471410,False,Mon,15,2022_15,201691.0,25.1
472,2022-04-12,2022-04-13 08:00,26476263.0,9227618.0,9835329.0,4853.0,26476263,False,Tue,15,2022_15,201728.0,37.0
473,2022-04-13,2022-04-14 08:00,26480745.0,9227996.0,9836261.0,4482.0,26480745,False,Wed,15,2022_15,201762.1,34.1
474,2022-04-14,,,,,1304.8,26482050,False,Thu,15,2022_15,201772.0,9.9
475,2022-04-15,,,,,1304.8,26483355,False,Fri,15,2022_15,201782.0,9.9
476,2022-04-16,,,,,1304.8,26484659,True,Sat,15,2022_15,201791.9,9.9
477,2022-04-17,,,,,1304.8,26485964,True,Sun,15,2022_15,201801.9,9.9
478,2022-04-18,2022-04-19 08:00,26487269.0,9228591.0,9837826.0,1304.8,26487269,False,Mon,16,2022_16,201811.8,9.9
479,2022-04-19,2022-04-20 08:00,26490860.0,9228933.0,9838665.0,3591.0,26490860,False,Tue,16,2022_16,201839.2,27.4


In [122]:
def guess_thing(df):
    total_rows = df.shape[0]
    for i in range(5, total_rows):
        is_weekend = df.iloc[i]['is_weekend']
        guess_was = guess_next_days_vacs(df.head(i), is_weekend)
        df.at[i, 'shots_guess'] = guess_was
    df['guess_off'] = (df['shots_guess'] - df['shots_today'])

def change_column_order(df):
    df = df[['date', 'publication_date', 'shots_sum', 'shots_today', 'shots_guess', 'guess_off', 'is_weekend', 'weekday_name', 'year_and_week']]
    return df

guess_thing(dfs['BY'])

path = f'data/df_LEARN_BY.pkl'
dfs['BY'].to_pickle(path)

dfs['BY'] = change_column_order(dfs['BY'])

In [123]:
dfs['BY'].tail(30)

Unnamed: 0,date,publication_date,shots_sum,shots_today,shots_guess,guess_off,is_weekend,weekday_name,year_and_week
450,2022-03-21,2022-03-22 08:00,26349888,6186.0,8665.0,2479.0,False,Mon,2022_12
451,2022-03-22,2022-03-23 08:00,26358850,8962.0,6112.0,-2850.0,False,Tue,2022_12
452,2022-03-23,2022-03-24 08:00,26367913,9063.0,7574.0,-1489.0,False,Wed,2022_12
453,2022-03-24,2022-03-25 08:00,26377410,9497.0,9013.0,-484.0,False,Thu,2022_12
454,2022-03-25,,26382694,5284.0,9280.0,3996.0,False,Fri,2022_12
455,2022-03-26,,26387978,5284.0,6037.0,753.0,True,Sat,2022_12
456,2022-03-27,2022-03-28 08:00,26393262,5284.0,5661.0,377.0,True,Sun,2022_12
457,2022-03-28,2022-03-29 08:00,26400593,7331.0,7391.0,60.0,False,Mon,2022_13
458,2022-03-29,2022-03-30 08:00,26408714,8121.0,6308.0,-1813.0,False,Tue,2022_13
459,2022-03-30,2022-03-31 08:00,26415840,7126.0,7726.0,600.0,False,Wed,2022_13


In [124]:
df = dfs['BY']
palette = shared.get_palette()
fig = go.Figure()
fig.add_trace(go.Scatter(x = df.date,
                         y=df['guess_off'],
                         mode='lines',
                         name='Wrong',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.add_trace(go.Scatter(x = df.date,
                         y=df['shots_today'],
                         mode='lines',
                         name='Actual',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.add_trace(go.Scatter(x = df.date,
                         y=df['shots_guess'],
                         mode='lines',
                         name='Guess',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.update_layout(
        width=1050,
        height=600,
        title='Schätzung Abweichung'
    )

fig.show()

In [125]:
print(f'With days to look back = 1 it was 0996311')
print(f'With days to look back = 2 it was 1127655')
print(f'With days to look back = 3 it was 1206126')

# with negativ values
# 1  106914
# 2 -163544
# 3 -244306

df = dfs['BY']
df['guess_off'].sum()

With days to look back = 1 it was 0996311
With days to look back = 2 it was 1127655
With days to look back = 3 it was 1206126


14573.999999999942