# Event based framework

### Parameters and mesured metrics:

#### Parameters for framework are:

1. $b:=$ tolerance of forecast

2. minimal level when the mesurement are relevant
 
 the analysed variables are mesured timepoints $T$ that can be splited up in: 
+ $T_{relevant}$

+ $T_{notrelevant}$

#### $T_{relevant}$ can be splited up in three sets:

+ $T_{OK}$

+ $T_{over}$

+ $T_{under}$

In [1]:
# %% includes
import os.path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import csv
import datetime as dt
from datetime import timedelta
from datetime import datetime
#import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
# Import Libraries and packages from Keras
from scipy import stats
from matplotlib.colors import ListedColormap
from matplotlib.pyplot import figure
#from sklearn.preprocessing import StandardScaler
#from sklearn.preprocessing import MinMaxScaler
#from tensorflow.keras import regularizers
#import h5py
#import joblib

START_FLUD2017 = 480750
END_FLUD2017 = 481550

###  function that gives the  "gradient" which is the difference of actual mesuremet of level and the predecessor

$ Steigung := y_{t} - y_{t-1}$

In [2]:
def add_grad(df):
    # define helping collumn "grad"
    df["grad"] = (df["measured"]-df["measured"].shift(+1))
    return df

### Filter Steigende Werte

In [3]:
def separation_increase_decrease(df):
    df_increase = df[df["grad"]>=0]
    df_decrease = df[df["grad"]<0]
    return df_increase,df_decrease

## Separation of events in sets:
3. $T_{relevant}$ : With **rising or equal level** that is over the minimal level
7. $T_{notrelevant}$ : all events that are not relevant $T_{relevant}$
4. $T_{OK}$ : prediction is inside of tolerance $(+/-b)$ relative to actual messurement
5. $T_{over}$ : potential falsalert
6. $T_{under}$ : underestimated level, potential overseen right alert

In [4]:
def separation_T(b,PEGEL_MIN,df_increase,df_decrease):
    #forecast tolerance

    #count all relevant time mesurements
    T_relevant = df_increase[df_increase["measured"]>=PEGEL_MIN]

    #all the other mesurements are not relevant
    T_not_relevant= pd.concat([df_decrease,df_increase[df_increase["measured"]<PEGEL_MIN]])

    #potential falsealert
    T_over = T_relevant.loc[df_increase["measured"]+b<df_increase["predicted"]]

    #potential underestimated events
    T_under = T_relevant.loc[df_increase["measured"]-b>df_increase["predicted"]]

    #events where mesurements are inside of the tollerance
    T_ok  = T_relevant.loc[abs(df_increase["measured"]-df_increase["predicted"])<=b]

    #print("Amount not relevant events", T_not_relevant.index.size,", Amount overestimated events", T_over.index.size, ", Amount underestimated events", T_under.index.size, ", Amount of correct guessed evnts", T_ok.index.size)
    
    return T_not_relevant,T_over,T_under,T_ok

In [5]:
def calc_Metrics(b, PEGEL_MIN, df):
        
    df_grad = add_grad(df)

    df_increase,df_decrease = separation_increase_decrease(df_grad)
    
    Annual_Events = 24*4*365
    Events_All = df.index.size
    Amount_Years = Events_All/Annual_Events
    #print(Amount_Years)

    T_not_relevant,T_over,T_under,T_ok = separation_T(b,PEGEL_MIN,df_increase,df_decrease)
    return T_not_relevant,T_over,T_under,T_ok,Amount_Years

## Auswertung FKT
* Prozentualer Anteil der Ereignisse an der Gesamtheit der Ereignisse <br />
($T_{ok}$/ ($T_{ok}$+ $T_{unter}$+ $T_{ueber}$)) <br />
($T_{unter}$/ ($T_{ok}$+ $T_{unter}$+ $T_{ueber}$)) <br />
($T_{ueber}$/ ($T_{ok}$+ $T_{unter}$+ $T_{ueber}$))
* Ereignisse pro Jahr := Amount_Events ($T_{ok}$+ $T_{unter}$+ $T_{ueber}$) / Anzahl der Jahre im Datensatz

Review: das haben wir dann ja anders Entschieden und umgesetzt:
* Amount_Events = ($T_{ok}$+ $T_{unter}$+ $T_{ueber}$  + $T_{nichtrelecvant}$)
Sollten wir dann auch in der Doku nachziehen (nach absprache)

In [6]:
def Auswertung(T_not_relevant,T_over,T_under,T_ok,Amount_Years):
    
    Prozentualer_Anteil_Ereignissen_an_Gesamtheit = "NaN"

    # Review: separat eingeführ für Übersicht
    Amount_relevant_events= T_over.index.size+T_under.index.size+T_ok.index.size
    Amount_Events = Amount_relevant_events+T_not_relevant.index.size

    Amount_relevant_events_anual = (Amount_relevant_events)/Amount_Years

    Ok_events_anual= (T_ok.index.size)/Amount_Years
    Under_evetns_anual = (T_under.index.size)/Amount_Years
    Over_events_anual = (T_over.index.size)/Amount_Years
    
    T_over_percent      = T_over.index.size/(Amount_Events)*100
    T_under_precent  = T_under.index.size/(Amount_Events)*100
    T_ok_precent = T_ok.index.size/(Amount_Events)*100

    #Error of measured and predicted
    Error_over= abs(T_over["measured"]-T_over["predicted"])
    Error_under = abs(T_under["measured"]-T_under["predicted"])
    Error_All = pd.concat([Error_over,Error_under])
    
    Summ_error = (np.sum(np.sum(Error_All)))

    Average_error= np.mean(Error_All)
    Min_error = np.min(Error_All)
    Max_error = np.max(Error_All)
    Median_error = np.median(Error_All)

    return(#Mächtigkeiten
           T_not_relevant.index.size,
           T_ok.index.size,
           T_over.index.size,
           T_under.index.size,
           #Anteil in Prozent
           T_ok_precent,
           T_over_percent,
           T_under_precent,
           # Jahrliche Betrachtung
           Amount_relevant_events_anual,
           Ok_events_anual,
           Over_events_anual,
           Under_evetns_anual,
           # Weitere Statistische Größen
           Summ_error,
           Average_error,
           Max_error,
           Median_error
          )

## Daten einlesen

In [7]:
def read_csv(path):
    return pd.read_csv(path, delimiter=",", engine="python")

In [8]:
def framework(foldername):
    hours = [2, 3, 4, 8, 12]
    b = 10
    PEGEL_MIN = 40 #Level at which the changes are relevant
    
    df_Auswertung = pd.DataFrame(index =["T_not_relevant", 
                                    "T_ok",
                                    "T_over", 
                                    "T_under", 

                                    "T_ok_average[%]",
                                    "T_over_relative_average[%]",
                                    "T_under_average[%]",
                                                                        
                                    "anual_events_all",
                                    "anual_events_ok",
                                    "anual_events_over",
                                    "anual_events_under",
                                    
                                    "summ_error",
                                    "average_error",
                                    "max_error",
                                    "median_error"],
                                    columns =[])
    
    for h in hours:
        df_h_origin = read_csv(f"../final results/{foldername}/Qualitative/{h}hrs/forecasts_{h}h.csv")
        df_h = (df_h_origin.copy()).dropna()
        T_not_relevant_h,T_over_h,T_under_h,T_ok_h, Amount_Years_h = calc_Metrics(b, PEGEL_MIN, df_h)
        Auswertung_h = Auswertung(T_not_relevant_h,T_over_h,T_under_h,T_ok_h, Amount_Years_h)
        df_Auswertung[f"{h}_h"]=Auswertung_h

    return df_Auswertung

In [None]:
folder = ['STRPM', 'Baseline', 'STRPMr']
for i in folder:
    framework_result = framework(i)
    print(f"\n~~~~~~~~~~~~ {i} ~~~~~~~~~~~~\n", framework_result)
    framework_result.to_csv(f"../final results/{i}/Quantitative/event_based_framework_{i}.csv", sep =",")


~~~~~~~~~~~~ STRPM ~~~~~~~~~~~~
                                       2_h            3_h            4_h  \
T_not_relevant              205368.000000  205366.000000  205364.000000   
T_ok                             5.000000       8.000000       3.000000   
T_over                           0.000000       0.000000       0.000000   
T_under                        262.000000     259.000000     264.000000   
T_ok_average[%]                  0.002431       0.003890       0.001459   
T_over_relative_average[%]       0.000000       0.000000       0.000000   
T_under_average[%]               0.127410       0.125953       0.128385   
anual_events_all                45.496314      45.496756      45.497199   
anual_events_ok                  0.851991       1.363199       0.511204   
anual_events_over                0.000000       0.000000       0.000000   
anual_events_under              44.644323      44.133558      44.985994   
summ_error                   11877.396867   11348.569684   11936.2