# We start off by reading the data

In [None]:
import shared

bl_dict = shared.bl_dict
bl_kurzel = shared.bl_kurzel

In [None]:
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go

dfs = {}
for kurzel in bl_kurzel:
    dfs[kurzel] = pd.read_pickle(f'data/df_vac_{kurzel}.pkl')

pd.options.plotting.backend = "plotly"
pd.set_option('float_format', '{:,.1f}'.format)

# colors
import plotly.express as px
from itertools import cycle

palette = shared.get_palette()


## Let's take a look

In [None]:
dfs['DE'].tail(5)

In [None]:
current_official_doses = dfs['DE'].tail(1)['dosen_kumulativ']
#current_official_doses.values[0]
current_official_doses

### Irgendwas mit Erst- Zweitimpfungen

In [None]:
palette = shared.get_palette()
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['personen_erst_kumulativ'],
                             mode='lines',
                             name='Erst geimpft',
                             marker_color=next(palette),
                             line=dict(width=2.5)))
fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['personen_voll_kumulativ'],
                             mode='lines',
                             name='Voll geimpft',
                             marker_color=next(palette),
                             line=dict(width=2.5)))
fig.update_layout(
        width=900,
        height=600,
        title='Erstimpfungen und Zweitimpfungen (Deutschland)'
    )
fig.show()

In [None]:
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
def add_dosen_for_bl(kurzel):
    to_plot = dfs[kurzel].set_index('date')
    fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['shots_sum'],
                             mode='lines',
                             name=f'{bl_dict[kurzel]}',
                             marker_color=next(palette),
                             line=dict(width=2.5)))

palette = shared.get_palette()
for kurzel in bl_kurzel:
    add_dosen_for_bl(kurzel)

fig.update_layout(
        width=1050,
        height=600,
        title='Absolute verteilte Impfdosen'
    )
shared.write_html(fig, 'vac_shots-bl-absolute')
fig.show()

In [None]:
to_plot = dfs['DE'].set_index('date')
fig = go.Figure()
def add_dosen_for_bl(kurzel):
    to_plot = dfs[kurzel].set_index('date')
    fig.add_trace(go.Scatter(x = to_plot.index,
                             y=to_plot['shots_sum_100k'],
                             mode='lines',
                             name=f'{bl_dict[kurzel]}',
                             marker_color=next(palette),
                             line=dict(width=2.5)))

palette = shared.get_palette()
for kurzel in bl_kurzel:
    add_dosen_for_bl(kurzel)

fig.update_layout(
        width=1050,
        height=600,
        title='Verteilte Impfdosen pro 100k Einwohner'
    )
shared.write_html(fig, 'vac_shots-bl-per-100k')
fig.show()

### Average doses of the last days

In [None]:
def avg_doses_of_last_x_days(last_days):
    data = []
    for i in range(1, last_days):
        data.append([i, int(dfs['DE'].tail(i)['shots_today'].sum() /i)])

    a = pd.DataFrame(columns=["Last x days", "Average vacs"], data=data)

    fig = go.Figure(data=[
        go.Bar(name='Three',x=a['Last x days'], y=a['Average vacs'])

    ])
    fig.update_layout(
        width=1000,
        height=350,
        title=f'Durchschnitt verteilte Impfdosen der letzten x Tage in Deutschland (Stand: {datetime.now().strftime("%Y-%m-%d")})'
    )
    shared.write_html(fig, f'avg-shots-last-{last_days}-days')
    fig.show()
    return a


In [None]:
a = avg_doses_of_last_x_days(100)
a.head(7)

In [None]:
a = avg_doses_of_last_x_days(365)



In [None]:
def add_rolling(fig, df, days, kurzel, column='shots_today_100k'):
    try:
        name = bl_dict[kurzel]
    except KeyError:
        name = kurzel
    fig.add_trace(go.Scatter(x = df.index,
                         y=df[column].rolling(days).mean(),
                         mode='lines',
                         name=name,
                         marker_color=next(palette),
                         line=dict( width=3)))

def add_traces(fig, df, column='shots_today_100k'):
    fig.add_trace(go.Scatter(x = df.index,
                             y=df[column],
                             mode='markers',
                             name='Real',
                             marker=dict(
                                color='grey',
                                size=4,
                            ),
                        ))

palette = shared.get_palette()
fig7 = go.Figure()
for kurzel in bl_kurzel:
    to_plot = dfs[kurzel].set_index('date')    
    add_rolling(fig7, to_plot, 7, kurzel)


palette = shared.get_palette()
fig21 = go.Figure()
for kurzel in bl_kurzel:
    to_plot = dfs[kurzel].set_index('date')
    add_rolling(fig21, to_plot, 21, kurzel)

palette = shared.get_palette()
fig_BY = go.Figure()
to_plot = dfs['BY'].set_index('date')
add_traces(fig_BY, to_plot, column='shots_today')
add_rolling(fig_BY, to_plot, 7, '7 Tage', column='shots_today')
add_rolling(fig_BY, to_plot, 14, '14 Tage', column='shots_today')
add_rolling(fig_BY, to_plot, 21, '21 Tage', column='shots_today')

palette = shared.get_palette()
fig_DE = go.Figure()
to_plot = dfs['DE'].set_index('date')
add_traces(fig_DE, to_plot, column='shots_today')
add_rolling(fig_DE, to_plot, 7, '7 Tage', column='shots_today')
add_rolling(fig_DE, to_plot, 14, '14 Tage', column='shots_today')
add_rolling(fig_DE, to_plot, 21, '21 Tage', column='shots_today')


fig7.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen (pro 100k) mit rollendem 7 Tage Fenster'
)

fig21.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen (pro 100k) mit rollendem 21 Tage Fenster'
)

fig_BY.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit verschiedenen rollenden Fenstern (Bayern)'
)

fig_DE.update_layout(
    width=1000,
    height=700,
    # yaxis_range=[0,400_000],
    title='Täglich verteilte Impfdosen mit verschiedenen rollenden Fenstern (Deutschland)'
)

shared.write_html(fig7, 'vac_daily-shots-7-day-window')
shared.write_html(fig21, 'vac_daily-shots-21-day-window')
shared.write_html(fig_BY, 'vac_daily-shots-BY')
shared.write_html(fig_DE, 'vac_daily-shots-DE')

fig7.show()
fig21.show()
fig_BY.show()
fig_DE.show()

## Vaccinations per weekday

In [None]:

to_plot_all = dfs['DE'].groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_10 = dfs['DE'].tail(7*10).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_5 = dfs['DE'].tail(7*5).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_2 = dfs['DE'].tail(7*2).groupby(["weekday_name"])['shots_today'].mean().sort_values()
to_plot_1 = dfs['DE'].tail(7*1).groupby(["weekday_name"])['shots_today'].mean().sort_values()

palette = shared.get_palette()
fig = go.Figure(data=[
    go.Bar(name='Ganzer Zeitraum', y=to_plot_all, x=to_plot_all.index),
    go.Bar(name='10 Wochen', y=to_plot_10, x=to_plot_10.index),
    go.Bar(name='5 Wochen', y=to_plot_5, x=to_plot_5.index),
    go.Bar(name='2 Wochen', y=to_plot_2, x=to_plot_2.index),
    go.Bar(name='letzte Woche', y=to_plot_1, x=to_plot_1.index),
])
fig.update_layout(
    width=1000,
    height=500,
    title='Verteilung der absoluten wöchentlichen Impfungen über die Wochentage',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
shared.write_html(fig, "weekdays_total")
fig.show()

In [None]:
def helper(number, sum):
    result = number / sum
    # print(f"Number is {number}, sum is {sum}, result is {result}")
    return result
    
def weekday_vac_proportion(df):
    df = df.groupby(["weekday_name"]).mean()
    df['shots_today']
    sum = df['shots_today'].sum()
    df['vac_proportion'] = df.apply(lambda x: helper(x['shots_today'], sum), axis=1)
    return df

In [None]:
to_plot_all = weekday_vac_proportion(dfs['DE'])
to_plot_10 = weekday_vac_proportion(dfs['DE'].tail(7*10))
to_plot_5 = weekday_vac_proportion(dfs['DE'].tail(7*5))
to_plot_2 = weekday_vac_proportion(dfs['DE'].tail(7*2))
to_plot_1 = weekday_vac_proportion(dfs['DE'].tail(7*1))
vor_hausarztimpfungen = weekday_vac_proportion(dfs['DE'].head(7*15))

palette = shared.get_palette()
fig = go.Figure(data=[
    go.Bar(name='Ganzer Zeitraum', y=to_plot_all['vac_proportion'], x=to_plot_all.index),
    go.Bar(name='10 Wochen', y=to_plot_10['vac_proportion'], x=to_plot_10.index),
    go.Bar(name='5 Wochen', y=to_plot_5['vac_proportion'], x=to_plot_5.index),
    go.Bar(name='2 Wochen', y=to_plot_2['vac_proportion'], x=to_plot_2.index),
    go.Bar(name='1 Wochen', y=to_plot_1['vac_proportion'], x=to_plot_1.index),
    go.Bar(name='vor Impfung in Praxen', y=vor_hausarztimpfungen['vac_proportion'], x=vor_hausarztimpfungen.index),

])
fig.update_layout(
    width=1000,
    height=500,
    title='Verteilung der wöchentlichen Impfungen über die Wochentage in Prozent',
)
fig.update_xaxes(categoryorder='array', categoryarray= ['Sat','Sun','Mon','Tue','Wed','Thu','Fri'])
shared.write_html(fig, "weekdays_prop")
fig.show()

## Vaccinations per calendar week

In [None]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum['year_and_week'] = to_plot_sum.index


In [None]:
df = dfs['DE']
to_plot_sum = df.groupby(['year_and_week']).sum()
to_plot_sum.reindex()

In [None]:
def vac_per_calendar_week(kurzel):
    df = dfs[kurzel]
    # tail(length - 5) removes 53rd calendar week
    to_plot_sum = df.groupby(['year_and_week']).sum()
    to_plot_mean = df.groupby(['year_and_week']).mean()
    to_plot_sum['year_and_week'] = to_plot_sum.index
    to_plot_mean['year_and_week'] = to_plot_mean.index
    to_plot_mean.year_and_week = to_plot_mean.year_and_week.apply(str)
    to_plot_sum.year_and_week = to_plot_sum.year_and_week.apply(str)


    fig = go.Figure(data=[
        go.Bar(name='Summe', y=to_plot_sum['shots_today'], x=to_plot_sum['year_and_week']),
        go.Bar(name='Tages Ø', y=to_plot_mean['shots_today'], x=to_plot_mean['year_and_week']),
    ])
    fig.update_layout(
        width=1000,
        height=500,
        title=f'Impfungen pro Kalender Woche ({bl_dict[kurzel]})'
    )
    shared.write_html(fig, f'vac-per-calendar-week-{kurzel}')
    fig.show()

vac_per_calendar_week('DE')
vac_per_calendar_week('BY')
vac_per_calendar_week('HE')

In [None]:
df = dfs['BY']
to_plot_sum = df.tail(len(df.index) - 9).groupby(["calendar_week"])['shots_today'].sum()
to_plot_sum.tail(20)
df.head(10)
df['date'].dt.year

df_filtered = df.loc[lambda x: df['date'].dt.year >= 2021]
df_filtered.head(10)


In [None]:
def is_next_day_weekend(df):
    next_day = df.tail(1).date + pd.DateOffset(1)
    if next_day.dt.dayofweek.values[0] > 4:
        return True
    else:
        return False

DAYS_TO_LOOK_BACK = 2

import math

def guess_next_days_vacs(df, is_weekend):
    df_filtered = df[df['is_weekend']==is_weekend]
    mean = df_filtered.tail(DAYS_TO_LOOK_BACK)['shots_today'].values.mean()
    return math.ceil(mean)


speed_list = []

for kurzel in bl_kurzel:
    df = dfs[kurzel]
    weekdays = guess_next_days_vacs(df, False)
    weekends = guess_next_days_vacs(df, True)
    speed_list.append([bl_dict[kurzel], weekdays, weekends])
    #print(f"{kurzel} -> Last {DAYS_TO_LOOK_BACK} days mean for (Mon Tue Wed, Thu, Fri): {weekdays} and for (Sat, Sun): {weekends} ")

speed_df = pd.DataFrame(speed_list,columns = ['bundesland', 'speed_weekday','speed_weekend'])
speed_df = speed_df.sort_values('speed_weekday', ascending=False)

fig = px.bar(speed_df, x='bundesland', y='speed_weekday',labels={
                     "bundesland": "Bundesland",
                     "speed_weekday": "Tägliche Impfungen (Vermutung)",
        },)
fig.update_layout(
        width=700,
        height=400,
        title=f'Abschätzung täglicher Impfungen for Land und Bundesländer'
    )
fig.show()

In [None]:
dfs['BY'].head(10)

In [None]:
dfs['BY'].tail(10)

In [None]:
def guess_thing(df):
    total_rows = df.shape[0]
    for i in range(5, total_rows):
        is_weekend = df.iloc[i]['is_weekend']
        guess_was = guess_next_days_vacs(df.head(i), is_weekend)
        df.at[i, 'shots_guess'] = guess_was
    df['guess_off'] = (df['shots_guess'] - df['shots_today'])

def change_column_order(df):
    df = df[['date', 'publication_date', 'shots_sum', 'shots_today', 'shots_guess', 'guess_off', 'is_weekend', 'weekday_name', 'year_and_week']]
    return df

guess_thing(dfs['BY'])

path = f'data/df_LEARN_BY.pkl'
dfs['BY'].to_pickle(path)

dfs['BY'] = change_column_order(dfs['BY'])

In [None]:
dfs['BY'].tail(30)

In [None]:
df = dfs['BY']
palette = shared.get_palette()
fig = go.Figure()
fig.add_trace(go.Scatter(x = df.date,
                         y=df['guess_off'],
                         mode='lines',
                         name='Wrong',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.add_trace(go.Scatter(x = df.date,
                         y=df['shots_today'],
                         mode='lines',
                         name='Actual',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.add_trace(go.Scatter(x = df.date,
                         y=df['shots_guess'],
                         mode='lines',
                         name='Guess',
                         marker_color=next(palette),
                         line=dict( width=2)))

fig.update_layout(
        width=1050,
        height=600,
        title='Schätzung Abweichung'
    )

fig.show()

In [None]:
print(f'With days to look back = 1 it was 0996311')
print(f'With days to look back = 2 it was 1127655')
print(f'With days to look back = 3 it was 1206126')

# with negativ values
# 1  106914
# 2 -163544
# 3 -244306

df = dfs['BY']
df['guess_off'].sum()