# Zeitreihenanalyse und Vorhersage

## Import von Bibliotheken

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
import pmdarima as pm

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import holidays
from meteostat import Hourly, Point
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

import plotly.io as pio
pio.renderers.default = "notebook_connected"

## Explorative Datenanalyse

### Datenextraktion

In [None]:
# Daten laden

df = pd.read_csv('XXXX.csv', sep=';', encoding='latin1')

In [None]:
# Nicht relevante Spalten entfernen

df = df.drop(columns=['Zeit','Filiale','Saison','WGR', 'Bon-Nr.', 
                      'Shop-Nr.', 'Kunde-Nr.', 'Verk.-Nr.', 
                      'Ärmellänge', 'Kragenform', 'Preisänderungsgrund', 
                      'Retourengrund', 'Rabattaktion', 'Auftrag', 
                      'Ident-EAN', 'Lieferant', 'Label', 
                      'Lief.Art-Nr.', 'eig.-Art-Nr.', 'Lief.Farbe', 
                      'Groesse', 'Bestand', 'Urspr.VK-Wert', 
                      'Lief.-Name', 'Passform', 'Bonus am', 
                      'VK-Kalk', 'VK-Etikett', 'VK-EK-Wert', 
                      'erz.VK-Wert', 'VK-Nachlass', 'VK-Rabatt', 
                      'Prospekt', '1.WE-Datum'])

# Daten darstellen

df

In [None]:
# Allgemeine Transformation

df_all = df.copy()
df_all = df_all.groupby('VK-Datum', as_index=False)['VK-Menge'].sum()
df_all['Datum'] = pd.to_datetime(df_all['VK-Datum'], format='%d.%m.%Y') 
df_all = df_all.set_index('Datum')
df_all = df_all.drop(columns=['VK-Datum'])
df_all = df_all.sort_index(ascending=True)
df_all

In [None]:
# Visualisierung

fig = px.line(df_all, x=df_all.index, y='VK-Menge')
fig.update_traces(line_color='black')
fig.update_layout(title='Verkaufsmenge',template='plotly_white', width = 900)
fig.show()

### Datentransformation

#### Filtern nach Produktgruppe und allgemeine Transformation

In [None]:
# Alle Produktgruppen listen

productgroups = np.unique(df['WGR.-Bez.'].values)

for i in range(0, len(productgroups), 3):
    chunk = productgroups[i:i+3]
    print("".join(word.ljust(20) for word in chunk))

In [None]:
# Auf eine Produktgruppe filtern

pg = 'Sacco'

In [None]:
# Daten transformieren

df_pg = df[df['WGR.-Bez.']==pg].reset_index(drop=True) 
df = df.drop(columns=['WGR.-Bez.']) 
df_pg = df_pg.drop(columns=['WGR.-Bez.']) 
df_pg = df_pg.groupby('VK-Datum', as_index=False)['VK-Menge'].sum() 
df_pg['Datum'] = pd.to_datetime(df_pg['VK-Datum'], format='%d.%m.%Y') 
df_pg = df_pg.drop(columns=['VK-Datum']) 
df_pg = df_pg.sort_values(by=['Datum']).reset_index(drop=True)

# Daten darstellen

df_pg

#### Tagesebene

In [None]:
# Daten für tägliche Analyse transformieren

df_daily = df_pg.copy() 
df_daily = df_daily.set_index('Datum') 
df_daily = df_daily.asfreq("D") 
df_daily['VK-Menge'] = df_daily['VK-Menge'].fillna(0).astype(int)
df_daily = df_daily.sort_index(ascending=True)

# Daten darstellen

df_daily


In [None]:
# Daten als Liniengraph darstellen

fig = px.line(df_daily, x=df_daily.index, y='VK-Menge')
fig.update_traces(line_color='black')
fig.update_layout(title='Tägliche Verkaufsmenge', template='plotly_white', width = 800)
fig.show()

#### Wochenebene

In [None]:
# Daten für wöchentliche Analyse transformieren

df_weekly = df_pg.copy()
df_weekly['Jahr'] = df_weekly['Datum'].dt.isocalendar().year
df_weekly['Woche'] = df_weekly['Datum'].dt.isocalendar().week
df_weekly = df_weekly.groupby(['Jahr', 'Woche'], as_index=False)['VK-Menge'].sum()

all_weeks = pd.date_range(start=df_pg['Datum'].min().to_period('W').start_time, 
                          end=df_pg['Datum'].max().to_period('W').end_time, freq='W-MON')  
all_weeks_df = pd.DataFrame({'Jahr': all_weeks.isocalendar().year, 'Woche': all_weeks.isocalendar().week})

df_weekly = all_weeks_df.merge(df_weekly, on=['Jahr', 'Woche'], how='left').fillna({'VK-Menge': 0})

df_weekly['Jahr-Woche'] = df_weekly['Jahr'].astype(str) + '-KW' + df_weekly['Woche'].astype(str).str.zfill(2) 
df_weekly = df_weekly.set_index('Jahr-Woche') 
df_weekly = df_weekly.drop(columns=['Jahr', 'Woche']) 
df_weekly = df_weekly.sort_index(ascending=True) 
df_weekly['VK-Menge'] = df_weekly['VK-Menge'].astype(int)

# Daten darstellen

df_weekly


In [None]:
# Daten als Liniengraph darstellen

fig = px.line(df_weekly, x=df_weekly.index, y='VK-Menge')
fig.update_traces(line_color='black')
fig.update_layout(title='Wöchentliche Verkaufsmenge',
                  template='plotly_white', 
                  width = 800)
fig.show()

#### Darstellung Tages und Wochenbasiert

In [None]:
# Daten gemeinsam darstellen

fig = make_subplots(rows=2, cols=1, shared_xaxes=False, vertical_spacing=0.1)

fig.add_trace(
    go.Scatter(x=df_daily.index, 
               y=df_daily['VK-Menge'], 
               mode='lines', 
               name='VK-Menge', 
               line=dict(color='black')),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=df_weekly.index, 
               y=df_weekly['VK-Menge'], 
               mode='lines', 
               name='', 
               line=dict(color='black')),
    row=2, col=1
)

# Achsenbeschriftungen und Layout
fig.update_layout(
    title='Tägliche und Wöchentliche Verkaufsmengen',
    xaxis_title='Datum',
    yaxis_title='VK-Menge',
    xaxis2_title='Jahr-Woche',
    yaxis2_title='VK-Menge',
    height=700,
    template='plotly_white',
    showlegend=False,
    width = 800
)

fig.show()


### Saisonalität und Trend

#### Täglich

In [None]:
# Zeitreihendekomposition

decomposition = seasonal_decompose(df_daily, period=7) # Periode von 7 Tagen

fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                    vertical_spacing=0.05,
                    subplot_titles=("Original", "Trend", "Seasonal", "Residual"))

# Original
fig.add_trace(go.Scatter(x=df_daily.index, 
                         y=decomposition.observed, 
                         name="Observed", 
                         line=dict(color='black')), 
                         row=1, 
                         col=1)
# Trend
fig.add_trace(go.Scatter(x=df_daily.index, 
                         y=decomposition.trend, 
                         name="Trend", 
                         line=dict(color='gray')), 
                         row=2, 
                         col=1)
# Seasonal
fig.add_trace(go.Scatter(x=df_daily.index, 
                         y=decomposition.seasonal, 
                         name="Seasonal", 
                         line=dict(color='red')), 
                         row=3, 
                         col=1)
# Residual
fig.add_trace(go.Scatter(x=df_daily.index, 
                         y=decomposition.resid, 
                         name="Residual", 
                         line=dict(color='gray')), 
                         row=4, 
                         col=1)

fig.update_layout(
    title='Dekomposition',
    template='plotly_white', 
    height=800,
    xaxis4_title='Tag',
    showlegend=False,
    width = 800
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True)

fig.show()

#### Wöchtentlich

In [None]:
# Zeitreihendekomposition

decomposition = seasonal_decompose(df_weekly, period=52) # Periode von 52 Kalenderwochen

fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                    vertical_spacing=0.05,
                    subplot_titles=("Original", "Trend", "Seasonal", "Residual"))

# Original
fig.add_trace(go.Scatter(x=df_weekly.index, 
                         y=decomposition.observed, 
                         name="Observed", 
                         line=dict(color='black')), 
                         row=1, 
                         col=1)
# Trend
fig.add_trace(go.Scatter(x=df_weekly.index, 
                         y=decomposition.trend, 
                         name="Trend", 
                         line=dict(color='gray')), 
                         row=2, 
                         col=1)
# Seasonal
fig.add_trace(go.Scatter(x=df_weekly.index, 
                         y=decomposition.seasonal, 
                         name="Seasonal", 
                         line=dict(color='red')), 
                         row=3, 
                         col=1)
# Residual
fig.add_trace(go.Scatter(x=df_weekly.index, 
                         y=decomposition.resid, 
                         name="Residual", 
                         line=dict(color='gray')), 
                         row=4, 
                         col=1)

fig.update_layout(
    title='Dekomposition',
    template='plotly_white', 
    height=800,
    xaxis4_title='Tag',
    showlegend=False,
    width = 800
)

fig.show()

## Modellbildung

### Naive Vorhersagemethode

In [None]:
# Aufteilung in Trainings- und Testset

naive_train = df_weekly[-60:-12]
naive_test = df_weekly[-12:]

naive_pred = naive_test.copy()

In [None]:
# Vorhersage der letzten Woche

last_week = naive_train['VK-Menge'].iloc[-1]
naive_pred['Last Week'] = last_week.astype(int)

In [None]:
# Vorhersage des letzten Monats

last_month = naive_train['VK-Menge'].iloc[-4:].values
last_month = np.tile(last_month, 3)
naive_pred['Last Month'] = last_month.astype(int)

In [None]:
# Vorhersage des letzten Quartals

last_quarter = naive_train['VK-Menge'].iloc[-12:].values
naive_pred['Last Quarter'] = last_quarter.astype(int)

In [None]:
# Vorhersage des letzten Halbjahres

last_half_year = naive_train['VK-Menge'].iloc[-24:].values
last_half_year = np.round(last_half_year.reshape(-1,2).mean(axis=1)).astype(int)
naive_pred['Last Half Year'] = last_half_year

In [None]:
# Vorhersage der letzten drei Quartale

last_three_quarter = naive_train['VK-Menge'].iloc[-36:].values
last_three_quarter = np.round(last_three_quarter.reshape(-1,3).mean(axis=1)).astype(int)
naive_pred['Last Three Quarter'] = last_three_quarter

In [None]:
# Vorhersage des letzten Jahres

last_year = naive_train['VK-Menge'].iloc[-48:].values
last_year = np.round(last_year.reshape(-1,4).mean(axis=1)).astype(int)
naive_pred['Last Year'] = last_year

In [None]:
# Mittelwertsvorhersage

naive_mean = naive_pred.copy()
naive_mean = naive_mean.drop(columns='VK-Menge')
naive_mean = naive_mean.values
naive_mean = np.round(naive_mean.mean(axis=1)).astype(int)
naive_pred['Naive Mean'] = naive_mean

In [None]:
# Daten darstellen

naive_pred['VK-Menge'] = naive_pred['VK-Menge'].astype(int)
naive_pred

In [None]:
# Daten als Liniengraph darstellen

fig = go.Figure()

fig.add_trace(go.Scatter(x=naive_train.index, 
                         y=naive_train['VK-Menge'], 
                         mode='lines', 
                         name='Train', 
                         line=dict(color='black'), 
                         showlegend=False))
fig.add_trace(go.Scatter(x=naive_pred.index, 
                         y=naive_pred['VK-Menge'], 
                         mode='lines', 
                         name='Test', 
                         line=dict(color='black'), 
                         showlegend=False))

forecast_names = [
    'Vorhersage Letzte Woche', 'Vorhersage Letzter Monat', 'Vorhersage Letztes Quartal',
    'Vorhersage Letztes Halbjahr', 'Vorhersage Letzte Drei Quartale', 'Vorhersage Letztes Jahr'
]
forecast_cols = [
    'Last Week', 'Last Month', 'Last Quarter', 'Last Half Year', 'Last Three Quarter', 'Last Year'
]

for name, col in zip(forecast_names, forecast_cols):
    fig.add_trace(go.Scatter(
        x=naive_pred.index,
        y=naive_pred[col],
        mode='lines',
        name=name,
        line=dict(color='gray'),
        showlegend=False  
    ))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='black'),
    name='VK-Menge'
))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='gray'),
    name='Naive Prognosen'
))

fig.add_trace(go.Scatter(x=naive_pred.index, 
                         y=naive_pred['Naive Mean'], 
                         mode='lines', 
                         name='Mittelwertsvorhersage', 
                         line=dict(color='red')))

fig.add_vline(x=naive_pred.index[0], line=dict(color='black', dash='dash'))

fig.add_shape(
    type="rect",
    xref="x",
    yref="paper",
    x0=naive_pred.index[0],
    y0=0,
    x1=df_weekly.index[-1],  
    y1=1,
    fillcolor="gray",
    opacity=0.3,
    layer="below",
    line_width=0
)

fig.update_layout(
    title='Naive Vorhersagemethode',
    template='plotly_white', 
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True)

fig.show()

### Trainings- und Testset

In [None]:
# Aufteilung in Trainings- und Testset

train_daily = df_daily[:-90]
test_daily = df_daily[-90:]

train_weekly = df_weekly[:-12]
test_weekly = df_weekly[-12:]

# DataFrame für Vorhersagen

df_pred_weekly = test_weekly.copy()
df_pred_daily = test_daily.copy()

### Prüfen statistischer Eigenschaften

#### Autokorrelationsfunktion

In [None]:
# Darstellung ACF für wöchentliche und tägliche Daten

weekly = plot_acf(train_weekly)
daily = plot_acf(train_daily)

#### ADF-Test

In [None]:
# ADF Test für tägliche Verkaufsmengen

ADF_daily_res = adfuller(train_daily)

print(f'ADF Statistic: {round(ADF_daily_res[0],4)}')
print(f'p-Value: {round(ADF_daily_res[1],4)}')

In [None]:
# ADF Test für wöchentliche Verkaufsmengen

ADF_weekly_res = adfuller(train_weekly)

print(f'ADF Statistic: {round(ADF_weekly_res[0],4)}')
print(f'p-Value: {round(ADF_weekly_res[1],4)}')

In [None]:
# Differenzierung

diff_train_weekly = np.diff(train_weekly['VK-Menge'], n=1)

In [None]:
# Erneuter ADF Test für wöchentliche Verkaufsmengen nach Differenzierung

ADF_diff_weekly_res = adfuller(diff_train_weekly)

print(f'ADF Statistic: {round(ADF_diff_weekly_res[0],4)}')
print(f'p-Value: {round(ADF_diff_weekly_res[1],4)}')

In [None]:
# Saisonale Differenzierung

saisonal_diff_train_weekly = np.diff(train_weekly['VK-Menge'], n=52)

In [None]:
# Erneuter ADF Test für wöchentliche Verkaufsmengen nach saisonaler Differenzierung

ADF_saisonal_diff_weekly_res = adfuller(saisonal_diff_train_weekly)

print(f'ADF Statistic: {round(ADF_saisonal_diff_weekly_res[0],4)}')
print(f'p-Value: {round(ADF_saisonal_diff_weekly_res[1],4)}')

#### Visualisierung Ursprüngliche und Differenzierte Zeitreihe

In [None]:
# Gleitenden Durchschnitt berechnen und transformation

rolling_mean = train_weekly['VK-Menge'].rolling(window=12).mean()
diff_train_weekly_series = pd.Series(diff_train_weekly, 
                                     index=train_weekly.index[1:], 
                                     name='VK-Menge')
rolling_mean_diff = diff_train_weekly_series.rolling(window=12).mean()

In [None]:
# Gleitenden Durchschnitt berechnen und transformation

saisonal_diff_train_weekly_series = pd.Series(saisonal_diff_train_weekly, 
                                              index=train_weekly.index[52:], 
                                              name='VK-Menge')
saisonal_rolling_mean_diff = saisonal_diff_train_weekly_series.rolling(window=12).mean()

In [None]:
# Visualisierung Ursprüngliche, differenzierte und saisonal differenzierte Zeitreihe

fig = make_subplots(rows=3, 
                    cols=1, 
                    shared_xaxes=True, 
                    vertical_spacing=0.1, 
                    subplot_titles=("Ursprünglich", "Differenziert", "Saisonal differenziert"))

fig.add_trace(
    go.Scatter(x=train_weekly.index, 
               y=train_weekly['VK-Menge'], 
               mode='lines', 
               name='VK-Menge', 
               line=dict(color='black')),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=train_weekly.index, 
               y=rolling_mean, 
               mode='lines', 
               name='Mean', 
               line=dict(color='red')),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x=diff_train_weekly_series.index, 
               y=diff_train_weekly_series, 
               mode='lines', 
               name='VK-Menge', 
               line=dict(color='black')),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x=diff_train_weekly_series.index, 
               y=rolling_mean_diff, 
               mode='lines', 
               name='Mean', 
               line=dict(color='red')),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x=saisonal_diff_train_weekly_series.index, 
               y=saisonal_diff_train_weekly_series, 
               mode='lines', 
               name='VK-Menge', 
               line=dict(color='black')),
    row=3, col=1
)

fig.add_trace(
    go.Scatter(x=saisonal_diff_train_weekly_series.index, 
               y=saisonal_rolling_mean_diff, 
               mode='lines', 
               name='Mean', 
               line=dict(color='red')),
    row=3, col=1
)


fig.update_layout(
    title='Ursprüngliche, differenzierte und saisonal differenzierte wöchentliche Zeitreihe',
    yaxis_title='VK-Menge',
    xaxis3_title='Jahr-Woche',
    height=800,
    template='plotly_white',
    showlegend=False,
    margin=dict(t=100),
    width = 800
)

fig.show()

### ARIMA-Modell

In [None]:
# ARIMA-Modell auf wöchentlichen und täglichen Daten trainieren

model_arima_weekly = pm.auto_arima(train_weekly, d=1, trace=True)
model_arima_daily = pm.auto_arima(train_daily, d=0, trace=True)

In [None]:
# Vorhersagen für tägliches und wöchentliches ARIMA-Modell

ARIMA_weekly = model_arima_weekly.predict(n_periods=12)
ARIMA_daily = model_arima_daily.predict(n_periods=90)

In [None]:
# Daten darstellen

df_pred_weekly['Naive_Mean'] = naive_pred['Naive Mean'].values
df_pred_weekly['ARIMA_weekly'] = np.round(ARIMA_weekly.values).astype(int)

df_pred_weekly

In [None]:
# Daten darstellen

df_pred_daily['ARIMA_daily'] = np.round(ARIMA_daily.values).astype(int)

df_pred_daily

In [None]:
# Daten als Liniengraph darstellen

fig = make_subplots(rows=2, cols=1, shared_xaxes=False, vertical_spacing=0.2)

fig.add_trace(
    go.Scatter(x=train_weekly.index, 
               y=train_weekly['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=1, col=1          
    )

fig.add_trace(
    go.Scatter(x=df_pred_weekly.index, 
               y=df_pred_weekly['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_weekly.index, 
               y=df_pred_weekly['ARIMA_weekly'], 
               mode='lines', 
               name='ARIMA_wochenbasis', 
               line=dict(color='blue'), 
               showlegend=False),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=train_daily.index, 
               y=train_daily['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['ARIMA_daily'], 
               mode='lines', 
               name='ARIMA_tagesbasis', 
               line=dict(color='blue'), 
               showlegend=False),
    row=2, col=1
)

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='gray'),
    name='Train'
))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='black'),
    name='Test'
))

fig.add_trace(
    go.Scatter(x=naive_pred.index, 
               y=naive_pred['Naive Mean'], 
               mode='lines', 
               name='Mittelwertsvorhersage', 
               line=dict(color='red')),
    row=1, col=1
    )

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='blue'),
    name='ARIMA'
))


fig.update_layout(
    title = 'ARIMA-Modell',
    template='plotly_white', 
    height=800,
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    xaxis2_title='Tag',
    yaxis2_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

x0_week = df_pred_weekly.index[0]
x1_week = df_pred_weekly.index[-1]
x0_day  = df_pred_daily.index[0]
x1_day  = df_pred_daily.index[-1]

fig.add_vline(x=x0_week, 
              row=1, 
              col=1, 
              line=dict(color='black', dash='dash'))
fig.add_vline(x=x0_day,  
              row=2, 
              col=1, 
              line=dict(color='black', dash='dash'))

fig.add_vrect(
    x0=x0_week, 
    x1=x1_week,
    fillcolor="gray", 
    opacity=0.3, 
    layer="below", 
    line_width=0,
    row=1, 
    col=1
)

fig.add_vrect(
    x0=x0_day, 
    x1=x1_day,
    fillcolor="gray", 
    opacity=0.3, 
    layer="below", 
    line_width=0,
    row=2, 
    col=1
)
fig.update_xaxes(tickangle=90, 
                 tickfont=dict(size=12), 
                 automargin=True, 
                 row=1, 
                 col=1)

fig.show()



#### Tagesbasierte Vorhersage auf Wochenbasierte Wochenebene summieren

In [None]:
# Daten transformieren

df_pred_daily['Woche'] = df_pred_daily.index.isocalendar().week
df_pred_daily['Jahr'] = df_pred_daily.index.isocalendar().year

df_pred_daily = (
    df_pred_daily
    .groupby(['Jahr', 'Woche'], as_index=False)
    .agg({
        'ARIMA_daily': 'sum'
    })
)
df_pred_daily['Jahr-Woche'] = df_pred_daily['Jahr'].astype(str) + '-KW' + df_pred_daily['Woche'].astype(str).str.zfill(2)
df_pred_daily = df_pred_daily.set_index('Jahr-Woche')
df_pred_daily = df_pred_daily.drop(columns=['Jahr', 'Woche'])
df_pred_weekly = df_pred_weekly.join(df_pred_daily['ARIMA_daily'], how='left')
df_pred_weekly


In [None]:
# Daten als Liniengraph darstellen

fig = go.Figure()

fig.add_trace(go.Scatter(x=train_weekly.index, 
                         y=train_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Train', 
                         line=dict(color='gray')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Test', 
                         line=dict(color='black')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['Naive_Mean'], 
                         mode='lines', 
                         name='Mittelwertsvorhersage', 
                         line=dict(color='red')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['ARIMA_weekly'], 
                         mode='lines', 
                         name='ARIMA_wochenbasiert', 
                         line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['ARIMA_daily'], 
                         mode='lines', 
                         name='ARIMA_tagesbasiert', 
                         line=dict(color='green')))

fig.add_shape(
    type="rect",
    xref="x",
    yref="paper",
    x0=df_pred_weekly.index[0],
    y0=0,
    x1=df_weekly.index[-1],  
    y1=1,
    fillcolor="gray",
    opacity=0.3,
    layer="below",
    line_width=0
)

fig.add_vline(x=df_pred_weekly.index[0], line=dict(color='black', dash='dash'))

fig.update_layout(
    title='ARIMA-Modell',
    template='plotly_white', 
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                   
    ),
    width = 800
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True)

fig.show()

### SARIMA-Modell

In [None]:
# DataFrame kopieren

df_pred_daily = test_daily.copy()

In [None]:
# SARIMA-Modell auf wöchentlichen und täglichen Daten trainieren

model_sarima_weekly = pm.auto_arima(train_weekly, seasonal=True, m=52, d=0, D=1, trace=True)
model_sarima_daily = pm.auto_arima(train_daily, seasonal=True, m=7, d=0, trace=True)

In [None]:
# Vorhersagen für tägliches und wöchentliches SARIMA-Modell

SARIMA_weekly = model_sarima_weekly.predict(n_periods=12)
SARIMA_daily = model_sarima_daily.predict(n_periods=90)

In [None]:
# Daten darstellen

df_pred_weekly['SARIMA_weekly'] = np.round(SARIMA_weekly.values).astype(int)
df_pred_daily['SARIMA_daily'] = np.round(SARIMA_daily.values).astype(int)

df_pred_weekly

In [None]:
# Daten darstellen

df_pred_daily

In [None]:
# Daten als Liniengraph darstellen

fig = make_subplots(rows=2, cols=1, shared_xaxes=False, vertical_spacing=0.2)

fig.add_trace(
    go.Scatter(x=train_weekly.index, 
               y=train_weekly['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=1, col=1          
    )

fig.add_trace(
    go.Scatter(x=df_pred_weekly.index, 
               y=df_pred_weekly['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_weekly.index, 
               y=df_pred_weekly['SARIMA_weekly'], 
               mode='lines', 
               name='SARIMA_wochenbasis', 
               line=dict(color='blue'), 
               showlegend=False),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=train_daily.index, 
               y=train_daily['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['SARIMA_daily'], 
               mode='lines', 
               name='SARIMA_tagesbasis', 
               line=dict(color='blue'), 
               showlegend=False),
    row=2, col=1
)

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='gray'),
    name='Train'
))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='black'),
    name='Test'
))

fig.add_trace(
    go.Scatter(x=naive_pred.index, 
               y=naive_pred['Naive Mean'], 
               mode='lines', 
               name='Mittelwertsvorhersage', 
               line=dict(color='red')),
    row=1, col=1
    )

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='blue'),
    name='SARIMA'
))


fig.update_layout(
    title = 'SARIMA-Modell',
    template='plotly_white', 
    height=800,
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    xaxis2_title='Tag',
    yaxis2_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

x0_week = df_pred_weekly.index[0]
x1_week = df_pred_weekly.index[-1]
x0_day  = df_pred_daily.index[0]
x1_day  = df_pred_daily.index[-1]

fig.add_vline(x=x0_week, row=1, col=1, line=dict(color='black', dash='dash'))
fig.add_vline(x=x0_day,  row=2, col=1, line=dict(color='black', dash='dash'))

fig.add_vrect(
    x0=x0_week, x1=x1_week,
    fillcolor="gray", opacity=0.3, layer="below", line_width=0,
    row=1, col=1
)

fig.add_vrect(
    x0=x0_day, x1=x1_day,
    fillcolor="gray", opacity=0.3, layer="below", line_width=0,
    row=2, col=1
)
fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True, row=1, col=1)

fig.show()



#### Tagesbasierte Vorhersage auf Wochenbasierte Wochenebene summieren

In [None]:
# Daten transformieren

df_pred_daily['Woche'] = df_pred_daily.index.isocalendar().week
df_pred_daily['Jahr'] = df_pred_daily.index.isocalendar().year

df_pred_daily = (
    df_pred_daily
    .groupby(['Jahr', 'Woche'], as_index=False)
    .agg({
        'SARIMA_daily': 'sum'
    })
)
df_pred_daily['Jahr-Woche'] = df_pred_daily['Jahr'].astype(str) + '-KW' + df_pred_daily['Woche'].astype(str).str.zfill(2)
df_pred_daily = df_pred_daily.set_index('Jahr-Woche')
df_pred_daily = df_pred_daily.drop(columns=['Jahr', 'Woche'])
df_pred_weekly = df_pred_weekly.join(df_pred_daily['SARIMA_daily'], how='left')
df_pred_weekly


In [None]:
# Daten als Liniengraph darstellen

fig = go.Figure()

fig.add_trace(go.Scatter(x=train_weekly.index, 
                         y=train_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Train', 
                         line=dict(color='gray')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Test', 
                         line=dict(color='black')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['Naive_Mean'], 
                         mode='lines', 
                         name='Mittelwertsvorhersage', 
                         line=dict(color='red')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['SARIMA_weekly'], 
                         mode='lines', 
                         name='SARIMA_wochenbasiert', 
                         line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['SARIMA_daily'], 
                         mode='lines', 
                         name='SARIMA_tagesbasiert', 
                         line=dict(color='green')))

fig.add_shape(
    type="rect",
    xref="x",
    yref="paper",
    x0=df_pred_weekly.index[0],
    y0=0,
    x1=df_weekly.index[-1],  
    y1=1,
    fillcolor="gray",
    opacity=0.3,
    layer="below",
    line_width=0
)

fig.add_vline(x=df_pred_weekly.index[0], line=dict(color='black', dash='dash'))

fig.update_layout(
    title='SARIMA-Modell',
    template='plotly_white', 
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True)

fig.show()

### (S)ARIMAX-Modell

In [None]:
# DataFrame kopieren

df_pred_daily = test_daily.copy()

#### Feature Engineering

In [None]:
# Exogene Variablen einbeziehenb und Datentransformation

df_daily['Wochentag'] = df_daily.index.dayofweek
df_daily['Monat'] = df_daily.index.month
de_holidays = holidays.DE(years=[2019+i for i in range(6)], subdiv='BY')
df_daily['Feiertag'] = df_daily.index.map(lambda d: d in de_holidays).astype(int)
df_daily['Geschlossen'] = (df_daily['Feiertag'] | (df_daily['Wochentag'] == 6)).astype(int)

location = Point(XXXXXXXXXXXX, XXXXXXXXXXXX)
start = datetime(2018, 12, 1)
end = datetime(2024, 12, 31)
data = Hourly(location, start, end) 
weather_raw = data.fetch() 
weather_raw = weather_raw.between_time('7:00', '20:00')
weather = weather_raw['temp'].groupby(weather_raw.index.date).mean().round(1).to_frame(name='Temperatur')
weather['Temperatur**2'] = weather['Temperatur']**2
weather['Niederschlag'] = weather_raw['prcp'].groupby(weather_raw.index.date).sum().round(2)
df_daily = df_daily.join(weather[['Temperatur', 'Temperatur**2', 'Niederschlag']], how='left')

df_daily['VK-Menge'] = df_daily['VK-Menge'].fillna(0) 
df_daily['VK-Menge'] = df_daily['VK-Menge'].astype(int) 
df_daily['Temperatur'] = df_daily['Temperatur'].round().astype(int) 
df_daily['Niederschlag'] = df_daily['Niederschlag'].round().astype(int) 
df_daily['Temperatur**2'] = df_daily['Temperatur**2'].round().astype(int) 
df_daily['Rabattaktion'] = df_daily['Monat'].isin([1, 6, 7, 8, 11, 12]).astype(int)

df_daily

In [None]:
# Aufteilung der Daten in Trainings- und Testset

train_daily = df_daily[:-90]
test_daily = df_daily[-90:]

In [None]:
# Auflistung der einbezogenen exogenen Variablen

exog = ['Wochentag', 'Monat', 'Geschlossen', 'Temperatur', 'Temperatur**2', 'Niederschlag', 'Rabattaktion']

In [None]:
# (S)ARIMAX-Modell auf täglichen Daten trainieren

model_arimax = pm.auto_arima(train_daily['VK-Menge'], train_daily[exog], d=0, trace=True)
model_sarimax = pm.auto_arima(train_daily['VK-Menge'], train_daily[exog], d=0, D=0, seasonal=True, m=7, trace=True)

In [None]:
# Vorhersagen für (S)ARIMAX-Modell

ARIMAX = model_arimax.predict(n_periods=90, X=test_daily[exog])
SARIMAX = model_sarimax.predict(n_periods=90, X=test_daily[exog])

In [None]:
# Daten darstellen

df_pred_daily['ARIMAX'] = np.round(ARIMAX.values).astype(int)
df_pred_daily['SARIMAX'] = np.round(SARIMAX.values).astype(int)

# Post-Processing-Schritt
df_pred_daily['ARIMAX'] = df_pred_daily['ARIMAX'].clip(lower=0)
df_pred_daily['SARIMAX'] = df_pred_daily['SARIMAX'].clip(lower=0)

df_pred_daily

In [None]:
# Daten als Liniengraph darstellen

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.2)

fig.add_trace(
    go.Scatter(x=train_daily.index, 
               y=train_daily['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=1, col=1          
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['ARIMAX'], 
               mode='lines', 
               name='ARIMAX', 
               line=dict(color='blue'), 
               showlegend=True),
    row=1, col=1
    )

fig.add_trace(
    go.Scatter(x=train_daily.index, 
               y=train_daily['VK-Menge'], 
               mode='lines', 
               name='Train', 
               line=dict(color='gray'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['VK-Menge'], 
               mode='lines', 
               name='Test', 
               line=dict(color='black'), 
               showlegend=False),
    row=2, col=1
    )

fig.add_trace(
    go.Scatter(x=df_pred_daily.index, 
               y=df_pred_daily['SARIMAX'], 
               mode='lines', 
               name='SARIMAX', 
               line=dict(color='green'), 
               showlegend=True),
    row=2, col=1
)

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='gray'),
    name='Train'
))

fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='lines',
    line=dict(color='black'),
    name='Test'
))


fig.update_layout(
    title = '(S)ARIMAX',
    template='plotly_white', 
    height=800,
    xaxis2_title='Tag',
    yaxis_title='VK-Menge',
    yaxis2_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

x0_day  = df_pred_daily.index[0]
x1_day  = df_pred_daily.index[-1]

fig.add_vline(x=x0_day,  row=1, col=1, line=dict(color='black', dash='dash'))
fig.add_vline(x=x0_day,  row=2, col=1, line=dict(color='black', dash='dash'))

fig.add_vrect(
    x0=x0_day, x1=x1_day,
    fillcolor="gray", opacity=0.3, layer="below", line_width=0,
    row=1, col=1
)

fig.add_vrect(
    x0=x0_day, x1=x1_day,
    fillcolor="gray", opacity=0.3, layer="below", line_width=0,
    row=2, col=1
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True, row=1, col=1)
fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True, row=2, col=1)


fig.show()



#### Tagesbasierte Vorhersage auf Wochenbasierte Wochenebene summieren

In [None]:
# Daten transformieren

df_pred_daily['Woche'] = df_pred_daily.index.isocalendar().week
df_pred_daily['Jahr'] = df_pred_daily.index.isocalendar().year

df_pred_daily = (
    df_pred_daily
    .groupby(['Jahr', 'Woche'], as_index=False)
    .agg({
        'ARIMAX': 'sum',
        'SARIMAX': 'sum'
    })
)
df_pred_daily['Jahr-Woche'] = df_pred_daily['Jahr'].astype(str) + '-KW' + df_pred_daily['Woche'].astype(str).str.zfill(2)
df_pred_daily = df_pred_daily.set_index('Jahr-Woche')
df_pred_daily = df_pred_daily.drop(columns=['Jahr', 'Woche'])
df_pred_weekly = df_pred_weekly.join(df_pred_daily[['ARIMAX', 'SARIMAX']], how='left')
df_pred_weekly

In [None]:
# Daten als Liniengraph darstellen

fig = go.Figure()

fig.add_trace(go.Scatter(x=train_weekly.index, 
                         y=train_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Train', 
                         line=dict(color='gray')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['VK-Menge'], 
                         mode='lines', 
                         name='Test', 
                         line=dict(color='black')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['Naive_Mean'], 
                         mode='lines', 
                         name='Mittelwertsvorhersage', 
                         line=dict(color='red')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['ARIMAX'], 
                         mode='lines', 
                         name='ARIMAX', 
                         line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df_pred_weekly.index, 
                         y=df_pred_weekly['SARIMAX'], 
                         mode='lines', 
                         name='SARIMAX', 
                         line=dict(color='green')))

fig.add_shape(
    type="rect",
    xref="x",
    yref="paper",
    x0=df_pred_weekly.index[0],
    y0=0,
    x1=df_weekly.index[-1],  
    y1=1,
    fillcolor="gray",
    opacity=0.3,
    layer="below",
    line_width=0
)

fig.add_vline(x=df_pred_weekly.index[0], line=dict(color='black', dash='dash'))

fig.update_layout(
    title='(S)ARIMAX',
    template='plotly_white', 
    xaxis_title='Jahr-Woche',
    yaxis_title='VK-Menge',
    legend=dict(
        orientation='h',         
        yanchor='bottom',        
        y=1.02,                  
        xanchor='center',        
        x=0.5                    
    ),
    width = 800
)

fig.update_xaxes(tickangle=90, tickfont=dict(size=12), automargin=True)

fig.show()

## Evaluation

In [None]:
# DataFrame kopieren

df_pred = df_pred_weekly.copy()
df_pred

In [None]:
# Berechnung von Fehlermetriken


# Mean Absolute Error
mae_Naive_Mean = mean_absolute_error(df_pred['VK-Menge'], df_pred['Naive_Mean'])
mae_ARIMA_weekly = mean_absolute_error(df_pred['VK-Menge'], df_pred['ARIMA_weekly'])
mae_ARIMA_daily = mean_absolute_error(df_pred['VK-Menge'], df_pred['ARIMA_daily'])
mae_SARIMA_weekly = mean_absolute_error(df_pred['VK-Menge'], df_pred['SARIMA_weekly'])
mae_SARIMA_daily = mean_absolute_error(df_pred['VK-Menge'], df_pred['SARIMA_daily'])
mae_ARIMAX = mean_absolute_error(df_pred['VK-Menge'], df_pred['ARIMAX'])
mae_SARIMAX = mean_absolute_error(df_pred['VK-Menge'], df_pred['SARIMAX'])

# Mean Squared Error
mse_Naive_Mean = mean_squared_error(df_pred['VK-Menge'], df_pred['Naive_Mean'])
mse_ARIMA_weekly = mean_squared_error(df_pred['VK-Menge'], df_pred['ARIMA_weekly'])
mse_ARIMA_daily = mean_squared_error(df_pred['VK-Menge'], df_pred['ARIMA_daily'])
mse_SARIMA_weekly = mean_squared_error(df_pred['VK-Menge'], df_pred['SARIMA_weekly'])
mse_SARIMA_daily = mean_squared_error(df_pred['VK-Menge'], df_pred['SARIMA_daily'])
mse_ARIMAX = mean_squared_error(df_pred['VK-Menge'], df_pred['ARIMAX'])
mse_SARIMAX = mean_squared_error(df_pred['VK-Menge'], df_pred['SARIMAX'])

# Weighted Mean Absolute Percentage Error
def wape(y_true, y_pred):
    return round((abs(y_true - y_pred).sum() / abs(y_true).sum()) * 100, 2)

sum_true = df_pred['VK-Menge'].sum()

wape_Naive_Mean = wape(df_pred['VK-Menge'], df_pred['Naive_Mean'])
wape_ARIMA_weekly = wape(df_pred['VK-Menge'], df_pred['ARIMA_weekly'])
wape_ARIMA_daily = wape(df_pred['VK-Menge'], df_pred['ARIMA_daily'])
wape_SARIMA_weekly = wape(df_pred['VK-Menge'], df_pred['SARIMA_weekly'])
wape_SARIMA_daily = wape(df_pred['VK-Menge'], df_pred['SARIMA_daily'])
wape_ARIMAX = wape(df_pred['VK-Menge'], df_pred['ARIMAX'])
wape_SARIMAX = wape(df_pred['VK-Menge'], df_pred['SARIMAX'])

# Erstellung DataFrame
df_error = pd.DataFrame(
    dict(
        MAE = [mae_Naive_Mean, 
               mae_ARIMA_weekly, 
               mae_ARIMA_daily, 
               mae_SARIMA_weekly, 
               mae_SARIMA_daily, 
               mae_ARIMAX, 
               mae_SARIMAX],
        MSE = [mse_Naive_Mean, 
               mse_ARIMA_weekly, 
               mse_ARIMA_daily, 
               mse_SARIMA_weekly, 
               mse_SARIMA_daily, 
               mse_ARIMAX, 
               mse_SARIMAX],
        WMAPE = [wape_Naive_Mean, 
                 wape_ARIMA_weekly, 
                 wape_ARIMA_daily, 
                 wape_SARIMA_weekly, 
                 wape_SARIMA_daily, 
                 wape_ARIMAX, 
                 wape_SARIMAX]
        ),
        index = ['Naive Mean', 
                 'ARIMA_wochenbasiert', 
                 'ARIMA_tagesbasiert', 
                 'SARIMA_wochenbasiert', 
                 'SARIMA_tagesbasiert', 
                 'ARIMAX', 
                 'SARIMAX']
    )

# Daten darstellen
df_error

In [None]:
# Mean Absolute Error als Balkendiagramm darstellen

mae_sorted = df_error.sort_values(by='MAE') 

fig = px.bar(mae_sorted, 
             x=mae_sorted.index, 
             y='MAE', 
             title='Mean Absolute Error', 
             color_discrete_sequence=['gray'])
fig.update_layout(template='plotly_white', 
                  height=600, 
                  width=700, 
                  title_text="Mean Absolute Error", 
                  xaxis_title='Modelle')

fig.show()

In [None]:
# Mean Squared Error als Balkendiagramm darstellen

mse_sorted = df_error.sort_values(by='MSE')

fig = px.bar(mse_sorted, 
             x=mse_sorted.index, 
             y='MSE', 
             title='Mean Squared Error', 
             color_discrete_sequence=['gray'])
fig.update_layout(template='plotly_white', 
                  height=600, 
                  width=700, 
                  title_text="Mean Squared Error", 
                  xaxis_title='Modelle')

fig.show()

In [None]:
# Weighted Mean Absolute Percentage Error als Balkendiagramm darstellen

wape_sorted = df_error.sort_values(by='WMAPE') 

fig = px.bar(wape_sorted, 
             x=mse_sorted.index, 
             y='WMAPE', 
             title='Weighted Mean Absolute Percentage Error', 
             color_discrete_sequence=['gray'])
fig.update_layout(template='plotly_white', 
                  height=600, 
                  width=700, 
                  title_text="Weighted Absolute Percentage Error", 
                  xaxis_title='Modelle')

fig.show()

In [None]:
# Berechnung der Summen, absoluten Differenzen, Mean Bias Error und Prozentualen Fehler

# Summen
sum_true = df_pred['VK-Menge'].sum()
sum_naive_mean = df_pred['Naive_Mean'].sum()
sum_arima_weekly = df_pred['ARIMA_weekly'].sum()
sum_arima_daily = df_pred['ARIMA_daily'].sum()
sum_sarima_weekly = df_pred['SARIMA_weekly'].sum()
sum_sarima_daily = df_pred['SARIMA_daily'].sum()
sum_arimax = df_pred['ARIMAX'].sum()
sum_sarimax = df_pred['SARIMAX'].sum()

# Absolute Differenzen
diff_true = abs(sum_true-sum_true)
diff_naive_mean = abs(sum_true-sum_naive_mean)
diff_arima_weekly = abs(sum_true-sum_arima_weekly)
diff_arima_daily = abs(sum_true-sum_arima_daily)
diff_sarima_weekly = abs(sum_true-sum_sarima_weekly)
diff_sarima_daily = abs(sum_true-sum_sarima_daily)
diff_arimax = abs(sum_true-sum_arimax)
diff_sarimax = abs(sum_true-sum_sarimax)

# Mean Bias Error
def mean_biased_error(y_true, y_pred):
    return np.mean(y_true-y_pred)

mbe_true = mean_biased_error(df_pred['VK-Menge'], df_pred['VK-Menge'])
mbe_naive_mean = mean_biased_error(df_pred['VK-Menge'], df_pred['Naive_Mean'])
mbe_arima_weekly = mean_biased_error(df_pred['VK-Menge'], df_pred['ARIMA_weekly'])
mbe_arima_daily = mean_biased_error(df_pred['VK-Menge'], df_pred['ARIMA_daily'])
mbe_sarima_weekly = mean_biased_error(df_pred['VK-Menge'], df_pred['SARIMA_weekly'])
mbe_sarima_daily = mean_biased_error(df_pred['VK-Menge'], df_pred['SARIMA_daily'])
mbe_arimax = mean_biased_error(df_pred['VK-Menge'], df_pred['ARIMAX'])
mbe_sarimax = mean_biased_error(df_pred['VK-Menge'], df_pred['SARIMAX'])

# Prozentualer Fehler
error_true = round(diff_true/sum_true,4)*100
error_naive_mean = round(diff_naive_mean/sum_true,4)*100
error_arima_weekly = round(diff_arima_weekly/sum_true,4)*100
error_arima_daily = round(diff_arima_daily/sum_true,4)*100
error_sarima_weekly = round(diff_sarima_weekly/sum_true,4)*100
error_sarima_daily = round(diff_sarima_daily/sum_true,4)*100
error_arimax = round(diff_arimax/sum_true,4)*100
error_sarimax = round(diff_sarimax/sum_true,4)*100

# Erstellung DataFrame
df_comparison = pd.DataFrame(
    dict(
        Summe=[sum_true, 
               sum_naive_mean, 
               sum_arima_weekly, 
               sum_arima_daily, 
               sum_sarima_weekly, 
               sum_sarima_daily, 
               sum_arimax, 
               sum_sarimax],
        Differenz=[diff_true,
                   diff_naive_mean,
                   diff_arima_weekly,
                   diff_arima_daily,
                   diff_sarima_weekly,
                   diff_sarima_daily,
                   diff_arimax,
                   diff_sarimax],
        MBE=[mbe_true,
             mbe_naive_mean,
             mbe_arima_weekly,
             mbe_arima_daily,
             mbe_sarima_weekly,
             mbe_sarima_daily,
             mbe_arimax,
             mbe_sarimax],
        Fehler=[error_true,
                error_naive_mean,
                error_arima_weekly,
                error_arima_daily,
                error_sarima_weekly,
                error_sarima_daily,
                error_arimax,
                error_sarimax]
    ),
    index=['VK-Menge', 'Naive Mean', 'ARIMA_wochenbasiert', 'ARIMA_tagesbasiert', 'SARIMA_wochenbasiert', 'SARIMA_tagesbasiert', 'ARIMAX', 'SARIMAX']
)

# Daten darstellen
df_comparison = df_comparison.style.set_caption('Sakko') \
    .set_table_styles([
        {
            'selector': 'caption',
            'props': [
                ('font-weight', 'bold'),  
                ('font-size', '150%'),    
                ('color', 'darkred'),    
                ('text-align', 'center')  
            ]
        }
    ])

df_comparison

In [None]:
df_comparison = pd.DataFrame(
    dict(
        Summe=[sum_true, 
               sum_naive_mean, 
               sum_arima_weekly, 
               sum_arima_daily, 
               sum_sarima_weekly, 
               sum_sarima_daily, 
               sum_arimax, 
               sum_sarimax],
        Differenz=[diff_true,
                   diff_naive_mean,
                   diff_arima_weekly,
                   diff_arima_daily,
                   diff_sarima_weekly,
                   diff_sarima_daily,
                   diff_arimax,
                   diff_sarimax],
        MBE=[mbe_true,
             mbe_naive_mean,
             mbe_arima_weekly,
             mbe_arima_daily,
             mbe_sarima_weekly,
             mbe_sarima_daily,
             mbe_arimax,
             mbe_sarimax],
        Fehler=[error_true,
                error_naive_mean,
                error_arima_weekly,
                error_arima_daily,
                error_sarima_weekly,
                error_sarima_daily,
                error_arimax,
                error_sarimax]
    ),
    index=['VK-Menge', 
           'Naive Mean', 
           'ARIMA_wochenbasiert', 
           'ARIMA_tagesbasiert', 
           'SARIMA_wochenbasiert', 
           'SARIMA_tagesbasiert', 
           'ARIMAX', 
           'SARIMAX']
)
mbe_sorted = df_comparison.sort_values(by='MBE') 
mbe_sorted = mbe_sorted.drop(index='VK-Menge', errors='ignore')
fig = px.bar(mbe_sorted, 
             x=mbe_sorted.index, 
             y='MBE', 
             title='Mean Biased Error', 
             color_discrete_sequence=['gray'])
fig.update_layout(template='plotly_white', 
                  height=600, 
                  width=700, 
                  title_text="Mean Biased Error", 
                  xaxis_title='Modelle')

fig.show()