In [213]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from logging import ERROR
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
tf.get_logger().setLevel(ERROR)

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import datetime
import matplotlib.pyplot as plt
from IPython.display import display
import keras.backend as K
from matplotlib.ticker import MultipleLocator,FormatStrFormatter 

from ipywidgets import IntSlider, ToggleButton, Checkbox, interact, HTML


In [333]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [338]:
# df_weekly = pd.read_csv('../data/clean/df_weekly.csv')
df_weekly = pd.read_csv('../data/clean/df_weekly_incidence.csv')

In [189]:
'''
Teile die In-Seqeuenz (Feature) in einzelne Sequenzen der Länge n_steps_in und
die Out-Sequenz (Target) in einzelne Sequenzen der Länge n_steps_out.

Beispiel: n_steps_in=1, n_steps_out=2
           Feature  | Target
    Seq 1:    1         -
        |     2         -
        |     3         6
        |     -         8       

@param overap 
Sollen sich die einzelnen Sequenzen überlappen?
    Standardmäßig überlappen sich die Daten der Sequenzen:
        1. Seq [1,2,3]
        2. Seq [2,3,4]
    mit overlap = False:
        1. Seq [1,2,3]
        2. Seq [4,5,6]

'''
def split_sequence(in_sequence, out_sequence, n_steps_in, n_steps_out, overlap=True, last_feature_as_target=True):
    X, y = list(), list()
    step = 1 if overlap else n_steps_in
    left_over = 0 
    target_offset = 1 if last_feature_as_target else 0

    for i in range(0,len(in_sequence),step):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out - target_offset
        # check if we are beyond the sequence
        if (out_end_ix > len(in_sequence)):
            left_over = len(in_sequence) - i
            break
   
        # gather input and output parts of the pattern     
        seq_x, seq_y = in_sequence[i:end_ix,:],  out_sequence[end_ix-target_offset:out_end_ix,-1] #out_sequence[end_ix-1:out_end_ix,-1]
        X.append(seq_x)
        y.append(seq_y)

       
    return np.array(X), np.array(y), left_over

In [339]:
'''
Prüfe, ob in einer Sequenzen basieren auf den IN und OUT Steps 
LK übergreifenden Daten vorkommen.
Entferne Zeilen, welche nicht mehr für eine volle Sequenz verwendet werden können

BSP:
    1. LK Erfurt
    2. LK Erfurt
    3. LK Erfurt
    4. LK Ahrweiler
    5. LK Ahrweiler

'''
def prepare_df_for_seqeuncing(df,n_steps_in, n_steps_out):
    droped_indices = []
    df_len = len(df)-1
    for i in range(0,df_len):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out -1
        # check if we are beyond the sequence
        if (out_end_ix > df_len):
            break
        start_row = df.iloc[i]
        end_row = df.iloc[out_end_ix]

        lk_of_seq_start = start_row.administrative_area_level_3
        lk_of_seq_end = end_row.administrative_area_level_3

        # Wenn es nicht mehr genug "Zeilen" eines LKs gibt müssen diese entfernt werden
        if (lk_of_seq_start != lk_of_seq_end):
            droped_indices.append(i)          
    
    df = df.drop(df.index[droped_indices])
    return df

In [340]:

TIME_STEPS_IN = 30
TIME_STEPS_OUT = 6
DATA_OVERLAP = True
# TARGET_COLUMN = "incidence"
TARGET_COLUMN = ""
TARGET_COLUMN = "incidence"
TRAIN_DATA_COLUMNS = None
TARGET_COLUMN_INDEX = 0

print("DF Full:",len(df_weekly))

df_weekly_adjusted = prepare_df_for_seqeuncing(df_weekly,TIME_STEPS_IN, TIME_STEPS_OUT)
df_weekly_adjusted.update(df_weekly_adjusted.select_dtypes(include=[np.number]).abs()) ## take absolute of every value
data = df_weekly_adjusted[['confirmed', 'deaths' ,"recovered"
    ,"vaccines","people_vaccinated","people_fully_vaccinated"
    ,"school_closing","workplace_closing","cancel_events"
    ,"gatherings_restrictions","transport_closing"
    ,"stay_home_restrictions","internal_movement_restrictions"
    ,"internal_movement_restrictions","information_campaigns","testing_policy"
    ,"contact_tracing","facial_coverings"
    ,"vaccination_policy","elderly_people_protection","population","cfr","cases_per_population","incidence"]]

TRAIN_DATA_COLUMNS = data.columns
TARGET_COLUMN_INDEX = TRAIN_DATA_COLUMNS.get_loc(TARGET_COLUMN)
DATA_LK_NAMES = df_weekly_adjusted.administrative_area_level_3.unique()
DATA_LK_COUNT = len(DATA_LK_NAMES)
 

DF Full: 58920


In [341]:

data = np.array(data)

print("Data:",data.shape)

# Vorbereitung der Eingangsdaten (Features) und der Zielvariablen
# features = data[:, :] 
# target = data[:, -1]


target = data[:, -1]
features = data[:,:3] ## nur die ersten 3 columns
print("Features:",features.shape,"Target:",target.shape)

N_FEATURES = features.shape[-1]

feat_range = (-1,1) # tanh
# feat_range = (0,1) # relu

# Daten normalisieren
feature_scaler = MinMaxScaler(feature_range=(0, 1))
target_scaler = MinMaxScaler(feature_range=(0, 1))

features_scaled = feature_scaler.fit_transform(features)
target_scaled = target_scaler.fit_transform(target.reshape(-1, 1))
print("Features Scaled:",features_scaled.shape,"Target Scaled:",target_scaled.shape)
## SCALING Muss nach sequenzierung passieren, sonst passt die Normaliseirung nicht, wenn Werte fehlen

feature_seqs, target_seq, left_over =  split_sequence(features_scaled,target_scaled, TIME_STEPS_IN, TIME_STEPS_OUT, overlap=DATA_OVERLAP)
df_weekly_adjusted = df_weekly_adjusted.drop(df_weekly_adjusted.index[-left_over:])
print("Features Seq.:",feature_seqs.shape,"Target Seq.:",target_seq.shape, "DF Adjust.:",len(df_weekly_adjusted))




train_size = int(len(feature_seqs) * 0.8)
test_size = len(feature_seqs) - train_size
print("Train Size: {}, Test Size: {}".format(train_size,test_size))


train_features_lstm, test_features_lstm = feature_seqs[:train_size], feature_seqs[train_size:]
train_target_lstm, test_target_lstm = target_seq[:train_size], target_seq[train_size:]

print("----------------------------------------")
print("TRAIN:",train_features_lstm.shape,"TEST:",test_features_lstm.shape)
print(train_features_lstm[0,:,:2],train_target_lstm[0,:])
# print(train_features_lstm[1,:,:2],train_target_lstm[1,:])





Data: (44535, 24)
Features: (44535, 3) Target: (44535,)
Features Scaled: (44535, 3) Target Scaled: (44535, 1)
Features Seq.: (44501, 30, 3) Target Seq.: (44501, 6) DF Adjust.: 44501
Train Size: 35600, Test Size: 8901
----------------------------------------
TRAIN: (35600, 30, 3) TEST: (8901, 30, 3)
[[1.45751638e-05 0.00000000e+00]
 [6.24649877e-05 0.00000000e+00]
 [1.13856637e-04 2.68283522e-04]
 [1.61841104e-04 3.75596931e-04]
 [2.09021099e-04 3.75596931e-04]
 [2.24590024e-04 3.75596931e-04]
 [2.30552591e-04 3.75596931e-04]
 [2.51222823e-04 3.75596931e-04]
 [2.83818189e-04 3.75596931e-04]
 [3.18666081e-04 3.75596931e-04]
 [3.58889747e-04 3.75596931e-04]
 [3.72329184e-04 3.75596931e-04]
 [3.82266796e-04 3.75596931e-04]
 [3.88560616e-04 3.75596931e-04]
 [3.91541900e-04 3.75596931e-04]
 [4.08932720e-04 3.75596931e-04]
 [4.25329780e-04 3.75596931e-04]
 [4.33942376e-04 5.00795908e-04]
 [4.43217481e-04 7.51193862e-04]
 [4.48186286e-04 7.51193862e-04]
 [4.66499885e-04 7.51193862e-04]
 [4.912

In [334]:
''' 
Encoder-Decoder Architecture für Sequence to Sequence
https://machinelearningmastery.com/encoder-decoder-long-short-term-memory-networks/

'''
K.clear_session()

stateful = False
activ = 'tanh'
activ_dec = 'elu'
# activ = 'relu'
# loss = tf.keras.losses.MeanSquaredError()
loss = "mse"
# loss = tf.keras.losses.CategoricalCrossentropy()
optim = tf.keras.optimizers.Adam()

ndim = 300
ndim2 = 100

model = Sequential([
    LSTM(ndim, activation=activ, batch_input_shape=(1, TIME_STEPS_IN, N_FEATURES),  input_shape=(TIME_STEPS_IN, N_FEATURES), return_sequences=True,stateful=stateful),
    LSTM(ndim, activation=activ,stateful=stateful),
    tf.keras.layers.RepeatVector(TIME_STEPS_OUT), # damit der Output 3d ([samples, time steps, features]) 
    LSTM(ndim, activation=activ, return_sequences=True,stateful=stateful), # return_sequences: Gibt den gesamten Output Eingabesequenz zurück und nicht nur den letzen
    LSTM(ndim, activation=activ, return_sequences=True,stateful=stateful), # return_sequences: Gibt den gesamten Output Eingabesequenz zurück und nicht nur den letzen
    
    # tf.keras.layers.TimeDistributed(Dense(ndim2, activation=activ)),
    
    tf.keras.layers.TimeDistributed(Dense(1)),
])

model.compile(optimizer=optim, loss=loss,metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (1, 30, 300)              364800    
                                                                 
 lstm_1 (LSTM)               (1, 300)                  721200    
                                                                 
 repeat_vector (RepeatVector  (1, 6, 300)              0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (1, 6, 300)               721200    
                                                                 
 lstm_3 (LSTM)               (1, 6, 300)               721200    
                                                                 
 time_distributed (TimeDistr  (1, 6, 1)                301       
 ibuted)                                                

In [118]:
# # returns train, inference_encoder and inference_decoder models
# def define_models(n_input, n_output, n_units):
#     # define training encoder
#     # encoder_inputs = tf.keras.layers.Input(shape=(None, n_input))
#     encoder_inputs = tf.keras.layers.Input(shape=(None, n_input))
#     encoder = LSTM(n_units, return_state=True)
#     encoder_outputs, state_h, state_c = encoder(encoder_inputs)
#     encoder_states = [state_h, state_c]
#     # define training decoder
#     decoder_inputs = tf.keras.layers.Input(shape=(None, n_output))
#     decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
#     decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
#     decoder_dense = Dense(n_output, activation='softmax')
#     decoder_outputs = decoder_dense(decoder_outputs)
#     model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
#     # define inference encoder
#     encoder_model = tf.keras.Model(encoder_inputs, encoder_states)
#     # define inference decoder
#     decoder_state_input_h = tf.keras.layers.Input(shape=(n_units,))
#     decoder_state_input_c = tf.keras.layers.Input(shape=(n_units,))
#     decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
#     decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
#     decoder_states = [state_h, state_c]
#     decoder_outputs = decoder_dense(decoder_outputs)
#     decoder_model = tf.keras.Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
#     # return all models
#     return model, encoder_model, decoder_model

In [119]:
# model, inference_encoder, inference_decoder = define_models(TIME_STEPS_IN,TIME_STEPS_OUT,20)
# model.compile(optimizer='adam', loss='mse',metrics=['accuracy'])
# model.summary()

In [120]:
def get_lk_data(step):
    lk_data = df_weekly_adjusted[df_weekly_adjusted.administrative_area_level_3 == DATA_LK_NAMES[step%int(train_size/DATA_LK_COUNT)]]
    lk_data_start = lk_data.index.values[0]
    lk_len = len(lk_data)
    return (train_features_lstm[lk_data_start:lk_data_start+lk_len,:,:], train_target_lstm[lk_data_start:lk_data_start+lk_len,:]),\
           (test_features_lstm[lk_data_start:lk_data_start+lk_len,:,:], test_target_lstm[lk_data_start:lk_data_start+lk_len,:]), DATA_LK_NAMES[step%DATA_LK_COUNT]

In [342]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# REPS = 5
TRAIN_LKS = int(train_size/DATA_LK_COUNT) 
EPOCHS = 25
BATCH_SIZE = 64

CUSTOM_TRAIN = False

loss = None
history = None

if CUSTOM_TRAIN:

    '''
    Custom Training Loop 
    - für Stateful LSTMs 
    - für ein LK pro Epoch
    '''

    print(f"Training for {EPOCHS} epochs!")
    for i in range(EPOCHS):    
        for j in range(TRAIN_LKS):
            (train,test,lk) = get_lk_data(j) 
            # print(f'EPOCH: {i: <4} - {lk: <37}',end=' | ',flush=True)
            train_x, train_y = train
            model.fit(train_x, train_y , epochs=1, batch_size=BATCH_SIZE, verbose=0, shuffle=False,validation_data=(test),)
        # model.reset_states()
        print(f'EPOCH: {i: <4}',end=' | ',flush=True)
        epoch_val_loss = model.evaluate(test_features_lstm, test_target_lstm,batch_size=BATCH_SIZE)
    loss = model.evaluate(test_features_lstm, test_target_lstm,batch_size=1)

else:

    class haltCallback(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs={}):
            stop_at = 0.03
            if(logs.get('loss') <= stop_at):
                print(f"\n\n\nReached {stop_at} loss value so cancelling training!\n\n\n")
                self.model.stop_training = True


    '''
    Normaler Training Loop 
    '''
## Trainiere                                                
    history = model.fit(
                        train_features_lstm,
                        # [train_features_lstm,train_features_lstm_decoder],
                        train_target_lstm, 
                        epochs=EPOCHS, 
                        batch_size=BATCH_SIZE, 
                        shuffle=False,
                        validation_data=(
                            test_features_lstm,
                            # [test_features_lstm,test_features_lstm_decoder],
                            test_target_lstm),
                        callbacks=[
                            tensorboard_callback,
                            # haltCallback()
                            tf.keras.callbacks.EarlyStopping(monitor='loss', patience=200),
                            ],
                        )

    loss = model.evaluate(test_features_lstm, test_target_lstm,batch_size=BATCH_SIZE)


print('Test Loss:', loss)

Epoch 1/25
Epoch 2/25
Epoch 3/25

In [122]:
# model.save("model.h5")

In [336]:

# make a one-step forecast
def forecast_lstm(model, batch_size, X):
    X = X.reshape(1, X.shape[0],X.shape[1])
    yhat = model.predict(X, batch_size=batch_size,verbose=0)
    yhat_2 = target_scaler.inverse_transform(yhat.reshape(-1,1)).reshape(-1)
    return yhat_2[0]

''' 
Walk forward validation.

Vorhersage für einzelne Zeitschritte über ein vorgegeben Zeitraum, mit sich verändernden Input-Daten.
Vorhersage für Zeitschritt t mit den Input-Daten von t bis t-TIME_STEPS_IN. 
Der Zeitschritt für welchen vorher eine Vorhersage gemacht werden sollte, ist jetzt teil der Input-Daten

'''
def walk_forward_validation(lstm_model, test_features, test_target,pred_steps, start_point=0):
    expect_predict = list()
    for i in range(start_point,start_point+pred_steps,1):
        # make one-step forecast
        X, y = test_features[i,:,:], test_target[i,:] # get the whole sequence
        
        yhat = forecast_lstm(lstm_model, 1, X)
        expec = target_scaler.inverse_transform(y[0].reshape(-1,1)).reshape(-1)[0]
        expect_predict.append(np.array([expec,yhat]))
        # print('Week=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expec))
    return np.array(expect_predict).reshape(pred_steps,2)

In [None]:
@interact(INDEX=IntSlider(min=0,max=len(test_features_lstm),step=1,value=0,continuous_update=False),show_input_data=Checkbox(value=False))
def show_validation(INDEX,show_input_data):
    # INDEX = 200
    TEST_INDEX_L = INDEX 
    TEST_INDEX_H = INDEX+1
    val_features = test_features_lstm[TEST_INDEX_L:TEST_INDEX_H]
    val_target = test_target_lstm[TEST_INDEX_L:TEST_INDEX_H]

    ## * IN Sequence  | Time Steps die nur reingegeben werden
    ## * OUT Sequence | Time Steps die vom Netzwerk predicted werden
    ## * TIME_STEPS_OUT, da die Prediction für zwei Zeitschritte aus der Prediction des letzen Val-Wertes und des ersten komplett unbekannten besteht.
    ## -> ÜBerscheiden sich am Ende 
    start_test_data = train_size
    start_val_data = start_test_data +TEST_INDEX_L
    end_val_data = (TEST_INDEX_H-TEST_INDEX_L)*TIME_STEPS_IN
    end_prediciton_vals= TIME_STEPS_OUT -1

    df_weekly_val = df_weekly_adjusted.iloc[start_val_data:start_val_data+end_val_data]

    start_lk = HTML("<b>START LK</b>: {}".format(df_weekly_val.iloc[0].administrative_area_level_3))
    end_lk = HTML("<b>END LK</b>: {}".format(df_weekly_val.iloc[-1].administrative_area_level_3))
    display(start_lk)
    display(end_lk)


    ## zur Kontrolle: de-scalierte Features
    de_time_series_val_fatures = val_features.reshape((TEST_INDEX_H-TEST_INDEX_L)*TIME_STEPS_IN,N_FEATURES)
    de_scaled_val_features = feature_scaler.inverse_transform(de_time_series_val_fatures)
    de_scaled_target_features = target_scaler.inverse_transform(val_target.reshape(-1,1)).reshape(-1)





    expect_target_rows = df_weekly_adjusted.iloc[start_val_data+end_val_data-1:start_val_data+end_val_data+end_prediciton_vals]


    prediction = model.predict(val_features)
    de_scaled_prediction = target_scaler.inverse_transform(prediction.reshape(-1,1)).reshape(-1)
    # prediction

    print(f"Sequence Prediction for: {TARGET_COLUMN}")
    values_is = expect_target_rows[TARGET_COLUMN].to_numpy()

    result_df = pd.DataFrame(data={
        "Row":expect_target_rows.index.values,
        "Week":expect_target_rows.week.values,
        "Predicted":de_scaled_prediction,
        "Expected":values_is,
        "abs(Diff)":abs(de_scaled_prediction -  values_is)})

    display(result_df)

    print("Val Expected Target")
    display(expect_target_rows.iloc[:,[0,1,2,3,4,5,-1]])


    if show_input_data:

        print("Kontrolle: de-skalierte Features:")
        print(" * Zeile {} | confirmed: {:.2f}, recovered: {:.2f}".format(df_weekly_val.index.values[0],de_scaled_val_features[0,0],de_scaled_val_features[0,1]))
        print("\nVal Input Features:")
        display(df_weekly_val)


    walk_distance = TIME_STEPS_OUT
    pred_data = walk_forward_validation(model,test_features_lstm,test_target_lstm,walk_distance,INDEX)
    x_axis = np.arange(0,pred_data.shape[0],1,dtype=np.int32)
    x_axis = expect_target_rows.week.values
    fig, axs = plt.subplots(figsize=(25,6))
    axs.plot(x_axis,pred_data[:,0])
    legend = ["Expected","Predicted (Seq.)","Predicted (Walk)"]
    axs.plot(x_axis,result_df["Predicted"].to_numpy())
    axs.plot(x_axis,pred_data[:,1],'--')
    axs.legend(legend)
    # axs.xaxis.set_major_locator(MultipleLocator(1)) 
    axs.xaxis.set_major_formatter(FormatStrFormatter('%d'))
    axs.set_xlabel("Weeks")
    axs.set_ylabel("Incidence")
    axs.grid("on")



interactive(children=(IntSlider(value=0, continuous_update=False, description='INDEX', max=8901), Checkbox(val…

In [219]:
@interact(INDEX=IntSlider(min=0,max=len(test_features_lstm),step=1,value=0,continuous_update=False),walk_distance=IntSlider(min=0,max=30,step=1,value=20,continuous_update=False))
def walk_foreward_val(INDEX,walk_distance):
    # walk_distance = 20
    # INDEX = 250
    pred_data = walk_forward_validation(model,test_features_lstm,test_target_lstm,walk_distance,INDEX)
    x_axis = np.arange(0,pred_data.shape[0],1,dtype=np.int32)

    fig, axs = plt.subplots(figsize=(20,6))
    axs.plot(x_axis,pred_data)
    legend = ["Expected","Predicted (Walk)"]
    axs.legend(legend)
    # axs.xaxis.set_major_locator(MultipleLocator(1)) 
    axs.xaxis.set_major_formatter(FormatStrFormatter('%d'))
    axs.set_xlabel("Weeks")
    axs.set_ylabel("Incidence")
    axs.grid("on")

interactive(children=(IntSlider(value=0, continuous_update=False, description='INDEX', max=8901), IntSlider(va…

In [330]:
start_test_data = train_size
df_weekly_val = df_weekly_adjusted.loc[start_test_data:]


large_diff_inputs = pd.DataFrame()
df_len = len(df_weekly_val)
for i in range(0,len(test_features_lstm),TIME_STEPS_IN):
        if i+TIME_STEPS_IN >= test_size:
                break
        val_features = test_features_lstm[i,:,:].reshape(1,TIME_STEPS_IN,N_FEATURES)
        val_target = test_target_lstm[i,:].reshape(1,TIME_STEPS_OUT)

        prediction = model.predict(val_features,verbose=0)
        de_scaled_target_features = target_scaler.inverse_transform(val_target.reshape(-1,1)).reshape(-1)
        de_scaled_prediction = target_scaler.inverse_transform(prediction.reshape(-1,1)).reshape(-1)
        abs_diff = abs(de_scaled_prediction-de_scaled_target_features)

        preds = np.append(np.zeros(TIME_STEPS_IN-TIME_STEPS_OUT)-1,de_scaled_prediction)
        for d in abs_diff:
                if d >= 100.0:
                        len_diff = len(large_diff_inputs)
                        large_diff_inputs = pd.concat([large_diff_inputs,df_weekly_val.iloc[i:i+TIME_STEPS_IN]])
                        try:    
                                large_diff_inputs.loc[large_diff_inputs.index[len_diff:len_diff+TIME_STEPS_IN],"Predicted"] = preds
                        except ValueError:
                                print("ERROR: ",i,i+TIME_STEPS_IN,len(large_diff_inputs))
                        break




In [332]:
large_diff_inputs


Unnamed: 0,administrative_area_level_3,year,week,confirmed,deaths,recovered,vaccines,people_vaccinated,people_fully_vaccinated,school_closing,...,facial_coverings,vaccination_policy,elderly_people_protection,latitude,longitude,population,cfr,cases_per_population,incidence,Predicted
35600,LK Stormarn,2021,51,11961.500000,346.000000,11615.500000,485966.500000,198702.833333,195663.500000,1.0,...,2.0,5.0,2.0,53.707825,10.305331,243196.0,0.028930,0.049185,214.504625,-1.000000
35601,LK Stormarn,2021,52,12490.400000,348.400000,12142.000000,494748.400000,199418.600000,196242.400000,1.0,...,2.0,5.0,2.0,53.707825,10.305331,243196.0,0.027899,0.051359,229.609040,-1.000000
35602,LK Stormarn,2021,53,3259.500000,182.500000,3077.000000,1683.000000,1683.000000,0.000000,3.0,...,2.0,2.0,2.0,53.707825,10.305331,243196.0,0.055989,0.013403,133.637066,-1.000000
35603,LK Stormarn,2022,1,13574.571429,350.857143,13223.714286,507396.000000,200202.428571,197494.142857,1.0,...,2.0,5.0,2.0,53.707825,10.305331,243196.0,0.025861,0.055817,408.253895,-1.000000
35604,LK Stormarn,2022,2,14512.285714,352.142857,14160.142857,524800.000000,201148.571429,199551.571429,1.0,...,2.0,5.0,2.0,53.707825,10.305331,243196.0,0.024275,0.059673,385.579650,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47380,SK Emden,2021,21,1259.400000,13.600000,1245.800000,25072.600000,18482.200000,6590.400000,3.0,...,2.0,3.0,2.0,53.363747,7.196228,50195.0,0.010796,0.025090,121.924494,948.843018
47381,SK Emden,2021,22,1307.500000,14.000000,1293.500000,27286.500000,19412.666667,7873.833333,1.0,...,2.0,3.0,2.0,53.363747,7.196228,50195.0,0.010708,0.026048,105.256168,890.517395
47382,SK Emden,2021,23,1318.000000,14.000000,1304.000000,30256.666667,20433.666667,9823.000000,1.0,...,2.0,5.0,2.0,53.363747,7.196228,50195.0,0.010622,0.026258,20.918418,687.630005
47383,SK Emden,2021,24,1323.333333,14.000000,1309.333333,33691.333333,21406.500000,12284.833333,1.0,...,2.0,5.0,2.0,53.363747,7.196228,50195.0,0.010579,0.026364,10.625228,543.169373


In [126]:



# def coloring(string,color="normal",style="normal"):
#     col = "0"
#     sty = "2"
#     if color == "blink":
#         col = "5"
#     elif   color == "red":
#         col = "31"
#     elif color == "green":
#         col = "32"
#     elif color == "yellow":
#         col = "33"
#     elif color == "blue":
#         col = "34"
#     elif color == "purple":
#         col = "35"

#     if style == "bold":
#         sty = "1"
#     elif style == "italic":
#         sty = "3"
#     elif style == "curl":
#         sty = "4"
#     elif style == "blink1":
#         sty = "5"
#     elif style == "blink2":
#         sty = "6"
#     elif style == "selected":
#         sty = "7"

#     return "\x1b[{}m\x1b[{}m{}\x1b[0m".format(col,sty,string)




# print("Prediction for: {}".format(coloring(TARGET_COLUMN,"yellow")))

# column_string = "    {}    | {}  | {} |  {}".format(coloring("Row","blue","bold"),coloring("Expected","green","bold"),coloring("Predicted","purple","bold"),coloring("abs(Diff)","red","bold"))
# print("{:#<50s}\n{}\n{:-<50s}".format("",column_string,""))
# for i, row in enumerate(except_taget_rows.index.values):
#     values_predict = de_scaled_prediction[i]
#     values_is = except_taget_rows.loc[row][TARGET_COLUMN]
#     # print("Zeile {} | {} (Predicted): {:.1f} , {} (Is): {:.1f}, |Diff|: {:.2f} ".format(row,TARGET_COLUMN,values_predict, TARGET_COLUMN ,values_is, abs(values_predict-values_is))) 
#     diff = abs(values_predict-values_is)
#     print(" {: ^22} | {: ^22} | {: ^22} | {: ^22}".format(coloring(row,"blue"),coloring(f"{values_predict:.1f}","green"),coloring(f"{values_is:.1f}","purple"),coloring(f"{diff:.1f}","red"))) 




Prediction for: [33m[2mincidence[0m
##################################################
    [34m[1mRow[0m    | [32m[1mExpected[0m  | [35m[1mPredicted[0m |  [31m[1mabs(Diff)[0m
--------------------------------------------------
   [34m[2m47390[0m   |   [32m[2m32.4[0m    |   [35m[2m33.9[0m    |    [31m[2m1.5[0m   
   [34m[2m47391[0m   |   [32m[2m22.0[0m    |   [35m[2m14.3[0m    |    [31m[2m7.7[0m   
   [34m[2m47392[0m   |   [32m[2m33.7[0m    |   [35m[2m27.9[0m    |    [31m[2m5.8[0m   
   [34m[2m47393[0m   |   [32m[2m47.5[0m    |   [35m[2m59.8[0m    |   [31m[2m12.2[0m   
   [34m[2m47394[0m   |   [32m[2m61.7[0m    |   [35m[2m79.7[0m    |   [31m[2m18.0[0m   
   [34m[2m47395[0m   |   [32m[2m66.7[0m    |   [35m[2m68.1[0m    |    [31m[2m1.4[0m   
