# Einlesen der Daten

In [58]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from Code.S01_Experimente.Monte_Carlo.process_hdf5 import plot_from_exp_id

plt.rcParams['figure.figsize'] = [13, 10] # Größe der Figure festsetzen

In [59]:
path = r"C:\Users\timbr\OneDrive\Masterarbeit\Daten\2020_08_13_Monte_Carlo_neue_Metriken\\"

# ersten Teil der Monte-Carlo-Studie einlesen
h5_filename1 = "2020_08_13__10_47_27_Monte_Carlo_Data.hdf5"
filename1 = "2020_08_13__10_47_27_Monte_Carlo_DataFrame.pkl"
df_raw1 = pd.read_pickle(path+filename1)

# DTW Daten einlesen
h5_filename2 = "2020_08_17__19_22_16_Monte_Carlo_Data.hdf5" 
filename2 = "2020_08_17__19_22_16_Monte_Carlo_DataFrame.pkl"
df_raw2 = pd.read_pickle(path+filename2)

# change exp_id so that there are no doubles
df_raw2.exp_id = df_raw2.exp_id + df_raw1.exp_id.max()

# DUMMY Daten einlesen
h5_filename3 = "2020_08_20__10_41_43_Monte_Carlo_Data.hdf5" 
filename3 = "2020_08_20__10_41_43_Monte_Carlo_DataFrame.pkl"
df_raw3 = pd.read_pickle(path+filename3)

# change exp_id so that there are no doubles
df_raw3.exp_id = df_raw3.exp_id + df_raw2.exp_id.max()


df_raw = pd.concat([df_raw1, df_raw2, df_raw3])
df_raw['signal_power'] = df_raw['signal_power1'] + df_raw['signal_power2']

In [60]:
numeric_columns = ['dist_id', 'resloss_sum', 'yexceed+5%', 'resloss_dist', 'signal_power', 'resloss_new1', 'resloss_new2']

for i in numeric_columns:
    df_raw[i] = df_raw[i].apply(pd.to_numeric, errors='coerce')

print(df_raw.shape)

(3174, 43)


In [31]:
for i in df_raw2.exp_id:
    plot_from_exp_id(path, h5_filename2, i)

KeyError: "Unable to open object (object 'exp2117' doesn't exist)"

# Auswertung der Experimente:
- Güte der Störerkennung
- Filterung ungültiger Experimente
- Störitendsität für Erkennung
- Messrauschen
- Modelqualität
- Totzeit


# Güte der Störerkennung
Auswertung hinsichtlich Anzahl an ``False Positive`` und ``False Negative``.

In [38]:
def determine_detection_quality(df):
    """ determine if two disturbances lead to fp or fn"""
    
    dist_types = ['cosinus', 'noisycos', 'sq_cos', 'chirp', 'ampcos', 'triag', 'random'] # verschiedene Störarten
    
    # init dataframe
    data = {'dist_types': dist_types, 'fp1': [0]*len(dist_types), 'fp2': [0]*len(dist_types),'fn1': [0]*len(dist_types),'fn2': [0]*len(dist_types), 'tp1': [0]*len(dist_types),'tp2': [0]*len(dist_types),}
    df_disttype = pd.DataFrame(data=data)
    
    for dist_type in dist_types:
        for idx, row in df.iterrows():

            # check if current disturbance is disturbance in simulation
            if (dist_type in row.dist_name1):
                if row.fp_dist1 >= 1:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['fp1']] += 1 #row.fp_dist1

            if dist_type in row.dist_name2:
                if row.fp_dist2 >= 1:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['fp2']] += 1 #row.fp_dist2

            if (dist_type in row.dist_name1):
                if row.fn_dist1 >= 1:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['fn1']] += 1 #row.fp_dist1

            if dist_type in row.dist_name2:
                if row.fn_dist2 >= 1:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['fn2']] += 1 #row.fp_dist2

            if dist_type in row.dist_name1:
                if row.fp_dist1 == 0:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['tp1']] += 1

            if dist_type in row.dist_name2:
                if row.fp_dist2 == 0:
                    df_disttype.loc[df_disttype['dist_types'] == dist_type, ['tp2']] += 1
                    
    return df_disttype

In [39]:
df_disttype = determine_detection_quality(df_raw)
df_disttype_gradient = determine_detection_quality(df_raw[df_raw['detect_mode']=='gradient algo'])
df_disttype_dtw = determine_detection_quality(df_raw[df_raw['detect_mode']=='dtw mean algo'])
df_disttype_model_algo = determine_detection_quality(df_raw[df_raw['detect_mode']=='model algo full'])
df_disttype_ar = determine_detection_quality(df_raw[df_raw['detect_mode']=='model ar l'])
df_disttype_arima = determine_detection_quality(df_raw[df_raw['detect_mode']=='model arima l'])

## Güte der Störerkennung mit Unterscheidung in verschiedene Störklassen

In [40]:
df_disttype

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,73,93,0,0,272,252
1,noisycos,80,99,0,1,265,246
2,sq_cos,229,223,1,1,1,7
3,chirp,104,96,0,0,241,249
4,ampcos,81,91,78,39,264,254
5,triag,73,63,98,110,272,282
6,random,147,152,296,286,543,538


## Aufgeschlüsselt nach Methoden

In [41]:
df_disttype_gradient

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,0,13,0,0,69,56
1,noisycos,1,15,0,0,68,54
2,sq_cos,46,40,0,0,0,6
3,chirp,24,34,0,0,45,35
4,ampcos,0,12,67,32,69,57
5,triag,45,47,23,21,24,22
6,random,59,74,114,89,79,64


In [42]:
df_disttype_dtw

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,69,69,0,0,0,0
1,noisycos,69,69,0,0,0,0
2,sq_cos,45,45,1,1,1,1
3,chirp,59,46,0,0,10,23
4,ampcos,60,66,10,7,9,3
5,triag,6,5,45,54,63,64
6,random,47,51,117,122,91,87


In [43]:
df_disttype_model_algo

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,4,3,0,0,65,66
1,noisycos,4,2,0,0,65,67
2,sq_cos,46,46,0,0,0,0
3,chirp,4,2,0,0,65,67
4,ampcos,4,2,0,0,65,67
5,triag,6,2,9,11,63,67
6,random,14,4,20,29,124,134


In [44]:
df_disttype_ar

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,0,3,0,0,69,66
1,noisycos,2,8,0,0,67,61
2,sq_cos,46,46,0,0,0,0
3,chirp,7,11,0,0,62,58
4,ampcos,13,4,0,0,56,65
5,triag,10,6,5,15,59,63
6,random,13,13,24,23,125,125


In [45]:
df_disttype_arima

Unnamed: 0,dist_types,fp1,fp2,fn1,fn2,tp1,tp2
0,cosinus,0,5,0,0,69,64
1,noisycos,4,5,0,1,65,64
2,sq_cos,46,46,0,0,0,0
3,chirp,10,3,0,0,59,66
4,ampcos,4,7,1,0,65,62
5,triag,6,3,16,9,63,66
6,random,14,10,21,23,124,128


In [21]:
df_raw2.signal_power

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
524    NaN
525    NaN
526    NaN
527    NaN
528    NaN
Name: signal_power, Length: 529, dtype: object

# False Negative eher bei schwachen Störungen

In [7]:
df_raw[df_raw['false_neg_count']>0].signal_power.mean()

0.4445430662258047

In [8]:
df_raw[df_raw['false_neg_count']>0].signal_power.median()

0.42837333998999955

In [9]:
df_raw.signal_power.mean()

0.5586152040163811

In [10]:
df_raw.signal_power.median()

0.5694656890706353

Fand eine Anpassung der Sollgröße aufgrund eines ``False Positive`` kurz vor der tatsächlichen Störung statt, so sind die Metriken, wie ``res loss``, nicht mehr aussagekräftigt. Im Folgenden werden diese aussortiert.

# Auswahl der zubetrachteten Experimente

In [61]:
df = df_raw[df_raw['disq']==0]

## Vergleich über alle Experimente gemittelt

In [62]:
df['r/s'] = df['resloss_sum'] / df['signal_power']
df_dmod = df[['detect_mode', 'resloss_sum', 'yexceed+5%', 'signal_power', 'r/s']].groupby(['detect_mode']).mean()
df_dmod['count'] = df[['detect_mode', 'resloss_sum']].groupby(['detect_mode']).count()

df_dmod

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,resloss_sum,yexceed+5%,signal_power,r/s,count
detect_mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
detect_dummy,784.912148,0.0,0.558705,1500.800749,529
dtw mean algo,521.950953,122.545089,0.558432,1056.505876,507
gradient algo,422.77042,185.724051,0.559891,894.3698,436
model algo full,409.457839,81.257526,0.560912,829.40314,501
model ar l,414.475827,340.567941,0.558539,842.393695,515
model arima l,368.395982,466.84855,0.559354,752.043405,493


# Vergleich der Methoden bei gleicher Störung

Im folgenden Teil wird verglichen, wie die Methoden auf die gleiche Störung reagieren.

In [63]:
# Liste mit allen Störungen
dist_list = [i for i in range(1, df['dist_id'].max())]

# DatFrame der zählt wie oft eine Methode bei einer bestimmten Störung als beste abgeschnitten hat

df_best = pd.DataFrame(columns=['resloss_sum', 'max_loss1', 'max_loss2', 'time_to_recover1', 'time_to_recover2'])

for idx in df.detect_mode.unique():
    for col in df_best.columns:
        df_best.loc[idx, col] = 0

In [64]:
for i in dist_list:
    
    df_dist = df[df['dist_id'] == i]
    
    for m in df_best.columns:
            
        df_dist.sort_values(by=[m], inplace = True)
        
        df_best.loc[df_dist.iloc[0].detect_mode, m] += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [65]:
df_best['max_loss'] = df_best['max_loss1'] + df_best['max_loss2']
df_best['time_to_recover'] = df_best['time_to_recover1'] + df_best['time_to_recover2'] 

In [66]:
df_best[['resloss_sum', 'max_loss', 'time_to_recover']]

Unnamed: 0,resloss_sum,max_loss,time_to_recover
gradient algo,145,96,110
model algo full,50,205,191
model arima l,247,387,380
model ar l,55,237,242
dtw mean algo,31,110,110
detect_dummy,0,21,23


# Zusammenhang zwischen Signalstärke und Resloss

# Lerneffekt

In [67]:
unique_dist = []

for idx, row in df.iterrows():
    for dist in row.dist_name1 + row.dist_name1:
        if dist not in unique_dist:
            unique_dist.append(dist)

In [68]:
df.columns

Index(['deadtime', 'detect_mode', 'dist_id', 'dist_name', 'exp_id',
       'model_qual', 'noise_sigma', 'react_mode', 'false_neg_count',
       'false_pos_count', 'fn_dist', 'fp_dist', 'fp_new', 'max_loss',
       'overlap', 'resloss_dist', 'resloss_new1', 'resloss_new2',
       'resloss_sum', 'time_to_detect', 'time_to_recover', 'yexceed+5%',
       'yexceed_dist', 'signal_power', 'dist_name1', 'dist_name2', 'fn_dist1',
       'fn_dist2', 'fp_dist1', 'fp_dist2', 'max_loss1', 'max_loss2',
       'resloss_dist1', 'resloss_dist2', 'time_to_detect1', 'time_to_detect2',
       'time_to_recover1', 'time_to_recover2', 'yexceed_dist1',
       'yexceed_dist2', 'signal_power1', 'signal_power2', 'disq', 'r/s'],
      dtype='object')

In [69]:
df['r/ex1'] = df['resloss_dist1'] / df['yexceed_dist1']
df['r/ex2'] = df['resloss_dist2'] / df['yexceed_dist2']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [70]:
df[df['resloss_new1'] >= df['resloss_new2']]

Unnamed: 0,deadtime,detect_mode,dist_id,dist_name,exp_id,model_qual,noise_sigma,react_mode,false_neg_count,false_pos_count,...,time_to_recover1,time_to_recover2,yexceed_dist1,yexceed_dist2,signal_power1,signal_power2,disq,r/s,r/ex1,r/ex2
1,0.5,model algo full,1,,10,1,0.05,move setpoint lim,0,0,...,0.00,0.0,0.000000,0.054859,0.375000,0.375000,0.0,345.547711,inf,3648.716530
2,0.5,model algo full,55,,100,1,0.05,move setpoint lim,0,0,...,6.68,0.0,82.655836,8.079401,0.375000,0.369584,0.0,562.896111,0.675993,44.959833
4,0.5,model algo full,342,,1001,1,0.05,move setpoint lim,1,0,...,0.00,0.0,0.000000,0.000000,0.053373,0.026990,0.0,5057.494827,inf,inf
6,0.5,model ar l,342,,1003,1,0.05,move setpoint lim,1,2,...,0.00,0.0,0.000000,0.000000,0.053373,0.026990,0.0,4881.899150,inf,inf
8,0.5,model arima l,342,,1005,1,0.05,move setpoint lim,2,0,...,0.00,0.0,0.000000,0.000000,0.053373,0.026990,0.0,5469.683996,inf,inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,0.5,detect_dummy,48,,2740,1,0.05,react_dummy,2,0,...,6.70,0.0,0.000000,0.000000,0.375000,0.375000,0.0,1239.455907,inf,inf
525,0.5,detect_dummy,116,,2741,1,0.05,react_dummy,2,0,...,6.69,0.0,0.000000,0.000000,0.374491,0.375000,0.0,1116.539878,inf,inf
526,0.5,detect_dummy,14,,2742,1,0.05,react_dummy,2,0,...,0.00,0.0,0.000000,0.000000,0.375000,0.244941,0.0,1018.930356,inf,inf
527,0.5,detect_dummy,83,,2743,1,0.05,react_dummy,2,0,...,0.00,0.0,0.000000,0.000000,0.374864,0.244941,0.0,1016.998967,inf,inf


In [71]:
df[df['resloss_new1'] < df['resloss_new2']]

Unnamed: 0,deadtime,detect_mode,dist_id,dist_name,exp_id,model_qual,noise_sigma,react_mode,false_neg_count,false_pos_count,...,time_to_recover1,time_to_recover2,yexceed_dist1,yexceed_dist2,signal_power1,signal_power2,disq,r/s,r/ex1,r/ex2
0,0.5,gradient algo,18,,1,1,0.05,move setpoint lim,1,0,...,0.0,0.00,5.583336,0.000000,0.375000,0.171189,0.0,742.100262,10.636265,inf
3,0.5,model arima l,291,,1000,1,0.05,move setpoint lim,0,0,...,0.0,0.00,0.124175,0.000000,0.194466,0.053373,0.0,1275.443641,250.504404,inf
5,0.5,gradient algo,292,,1002,1,0.05,move setpoint lim,1,8,...,0.0,0.00,13.721414,33.678143,0.194466,0.163456,0.0,1131.274254,2.193997,11.128959
7,0.5,model arima l,240,,1004,1,0.05,move setpoint lim,0,0,...,6.8,11.38,992.303881,333.142569,0.363573,0.368016,0.0,756.306687,0.045869,1.524239
10,0.5,gradient algo,241,,1007,1,0.05,move setpoint lim,0,1,...,0.0,7.68,17.954596,41.396204,0.363573,0.363573,0.0,561.712348,2.454576,8.802149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
508,0.5,detect_dummy,29,,2725,1,0.05,react_dummy,2,0,...,0.0,6.77,0.000000,0.000000,0.375000,0.374491,0.0,1233.248556,inf,inf
512,0.5,detect_dummy,63,,2729,1,0.05,react_dummy,2,0,...,6.5,28.88,0.000000,0.000000,0.375000,0.333583,0.0,2200.480369,inf,inf
513,0.5,detect_dummy,98,,2730,1,0.05,react_dummy,2,0,...,0.0,7.10,0.000000,0.000000,0.374549,0.374491,0.0,1241.756415,inf,inf
519,0.5,detect_dummy,13,,2735,1,0.05,react_dummy,2,0,...,0.0,0.00,0.000000,0.000000,0.375000,0.194466,0.0,1211.629324,inf,inf


In [72]:
df['lerneffekt'] = df['resloss_new1'] - df['resloss_new2']
df.head(50)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,deadtime,detect_mode,dist_id,dist_name,exp_id,model_qual,noise_sigma,react_mode,false_neg_count,false_pos_count,...,time_to_recover2,yexceed_dist1,yexceed_dist2,signal_power1,signal_power2,disq,r/s,r/ex1,r/ex2,lerneffekt
0,0.5,gradient algo,18,,1,1,0.05,move setpoint lim,1,0,...,0.0,5.583336,0.0,0.375,0.171189,0.0,742.100262,10.63626,inf,-143.805882
1,0.5,model algo full,1,,10,1,0.05,move setpoint lim,0,0,...,0.0,0.0,0.054859,0.375,0.375,0.0,345.547711,inf,3648.71653,0.576328
2,0.5,model algo full,55,,100,1,0.05,move setpoint lim,0,0,...,0.0,82.655836,8.079401,0.375,0.369584,0.0,562.896111,0.675993,44.959833,32.444311
3,0.5,model arima l,291,,1000,1,0.05,move setpoint lim,0,0,...,0.0,0.124175,0.0,0.194466,0.053373,0.0,1275.443641,250.5044,inf,-51.989901
4,0.5,model algo full,342,,1001,1,0.05,move setpoint lim,1,0,...,0.0,0.0,0.0,0.053373,0.02699,0.0,5057.494827,inf,inf,20.865296
5,0.5,gradient algo,292,,1002,1,0.05,move setpoint lim,1,8,...,0.0,13.721414,33.678143,0.194466,0.163456,0.0,1131.274254,2.193997,11.128959,-13.717881
6,0.5,model ar l,342,,1003,1,0.05,move setpoint lim,1,2,...,0.0,0.0,0.0,0.053373,0.02699,0.0,4881.89915,inf,inf,13.165454
7,0.5,model arima l,240,,1004,1,0.05,move setpoint lim,0,0,...,11.38,992.303881,333.142569,0.363573,0.368016,0.0,756.306687,0.04586917,1.524239,-128.467201
8,0.5,model arima l,342,,1005,1,0.05,move setpoint lim,2,0,...,0.0,0.0,0.0,0.053373,0.02699,0.0,5469.683996,inf,inf,25.676626
9,0.5,model algo full,292,,1006,1,0.05,move setpoint lim,0,0,...,0.0,0.0,0.0,0.194466,0.163456,0.0,888.340475,inf,inf,40.257037


In [73]:
df['lerneffekt'] = df['lerneffekt'].apply(pd.to_numeric, errors='coerce')
df[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,-6.778526
dtw mean algo,-13.035121
gradient algo,-36.675712
model algo full,4.590207
model ar l,17.488861
model arima l,13.348329


# gleiche Gruppe von Störungen

In [74]:
df_cos = df[df['dist_name1'].str.contains('cosinus') & df['dist_name2'].str.contains('cosinus')]
df_cos[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,1.256219
dtw mean algo,1.748979
gradient algo,2.927322
model algo full,2.327643
model ar l,3.795893
model arima l,14.747852


In [75]:
df_noisy = df[df['dist_name1'].str.contains('noisycos') & df['dist_name2'].str.contains('noisycos')]
df_noisy[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,1.461774
dtw mean algo,-4.079874
gradient algo,2.846909
model algo full,3.780092
model ar l,3.852124
model arima l,10.686893


In [76]:
df_chirp = df[df['dist_name1'].str.contains('chirp') & df['dist_name2'].str.contains('chirp')]
df_chirp[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,-60.541298
dtw mean algo,-25.04325
gradient algo,-53.41078
model algo full,-32.87829
model ar l,-22.392768
model arima l,-45.709593


In [77]:
df_rand = df[df['dist_name1'].str.contains('random') & df['dist_name2'].str.contains('random')]
df_rand[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,1.87408
dtw mean algo,-4.184426
gradient algo,-43.868084
model algo full,0.854617
model ar l,14.272757
model arima l,-0.612008


In [78]:
df_triag = df[df['dist_name1'].str.contains('triag') & df['dist_name2'].str.contains('triag')]
df_triag[['detect_mode', 'lerneffekt']].groupby(['detect_mode']).mean()

Unnamed: 0_level_0,lerneffekt
detect_mode,Unnamed: 1_level_1
detect_dummy,0.608849
dtw mean algo,-17.131606
gradient algo,-256.063032
model algo full,75.359608
model ar l,74.127593
model arima l,0.109996
