# Anomaly Modelling

### Imports

In [17]:
import os
import arrow
import pandas as pd
import time 
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('tableau-colorblind10')

In [18]:
from AnomalyGenerator import AnomalyGenerator

### Set configs

In [19]:
cwd = os.getcwd()
print(cwd)
data_fn = os.path.join(cwd, 'data_modelling')
fig_fn = os.path.join(cwd, 'figs')
print(data_fn)

/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator
/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator/data_modelling


## Import Data

In [22]:
h5_fn = os.path.join(data_fn, 'simulation_data_reduced.h5')
simulation_data = pd.read_hdf(h5_fn, key='df')

In [23]:
simulation_data.head()

Unnamed: 0,line_1_1_loading,line_2_3_loading,line_3_4_loading,line_4_5_loading,line_5_6_loading,line_7_8_loading,line_8_9_loading,line_9_10_loading,line_10_11_loading,line_3_8_loading,line_12_13_loading,line_13_14_loading,line_6_7_loading,line_11_14_loading,line_14_8_loading,trafo_0_loading,trafo_1_loading
2020-01-01 00:00:00,24.644473,20.131101,8.404872,7.058014,2.865729,4.337964,7.136024,2.829874,2.875793,12.656436,1.386398,0.480776,0.092008,0.187712,0.038735,5.497073,0.374605
2020-01-01 00:15:00,21.7904,17.948475,7.670718,6.389197,2.648196,4.053554,6.380672,2.52829,2.573173,11.468529,1.804625,0.619204,0.092209,0.188139,0.038747,4.855954,0.48761
2020-01-01 00:30:00,19.27863,16.028356,7.015811,5.801115,2.44964,3.786643,5.728396,2.268028,2.311765,10.422091,2.156886,0.736068,0.092387,0.188516,0.038757,4.291608,0.58279
2020-01-01 00:45:00,17.169135,14.410755,6.460969,5.303823,2.280746,3.558796,5.178226,2.048673,2.09119,9.537306,2.447429,0.832563,0.092537,0.188835,0.038765,3.818051,0.661295
2020-01-01 01:00:00,15.598922,13.203294,6.040075,4.931252,2.150115,3.378654,4.772639,1.887093,1.928513,8.873382,2.652278,0.900637,0.092649,0.189073,0.038771,3.465591,0.716645


## Add Column for labels

In [24]:
simulation_data['label'] = np.zeros(len(simulation_data.index))

## Select year 2021 to be anomalous

In [25]:
anomalous_data = simulation_data.loc['2022-01-01 00:00:00':'2022-12-31 23:45:00']

## Make Anomalies Great Again

### Anomaly Configs

In [26]:
len(anomalous_data)

35040

In [27]:
outliers = {
    'collective': [{'feature': 'line_1_1_loading', 
                    'timestamps': [(1000,1222), (4560,4580), (30001, 30050), (15000, 15024)],
                    'factors': [400,1000, 1200, 300],
                   },
                   {'feature': 'line_5_6_loading',
                    'factors': [100, 200, 100, 450],
                    'timestamps': [(3,10), (24000, 24020), (11111, 11222), (30000, 32000)],
                   },
                   {'feature': 'trafo_0_loading',
                    'factors': [110, 120, 100, 450],
                    'timestamps': [(1000,1009), (2200, 2210), (11133, 11144), (32000, 32021)],
                   },
                  ],
    
    'point': [{'feature': 'line_2_3_loading',
               'timestamps': [10, 30, 10000, 14420, 17010, 28021, 34340],
               'factors': [400, 100, 120, 200, 300, 110, 120],
              },
              {'feature': 'line_11_14_loading',
               'timestamps': [100, 3000, 11200, 15520, 18020, 26041, 33333],
               'factors': [400, 1000, 300, 200, 101, 90000, 400],
              },
              {'feature': 'trafo_1_loading',
               'timestamps': [1200, 5000, 13200, 17520, 17820, 24041, 32333],
               'factors': [30, 100, 420, 220, 100, 800, 400],
              }
             ],
    
    'noise': [{'feature': 'line_7_8_loading',
               'timestamps': [(10, 30), (4000, 4020), (26666, 26670)],
               'factors': [10, 3, 5],
              }
             ]
}

In [28]:
adGenerator = AnomalyGenerator(anomalous_data,outliers)

In [29]:
pd.options.mode.chained_assignment = None
anomalous_data = adGenerator.make_anomalous()
pd.options.mode.chained_assignment = 'warn'

In [31]:
anomalous_data.sample(20)

Unnamed: 0,line_1_1_loading,line_2_3_loading,line_3_4_loading,line_4_5_loading,line_5_6_loading,line_7_8_loading,line_8_9_loading,line_9_10_loading,line_10_11_loading,line_3_8_loading,line_12_13_loading,line_13_14_loading,line_6_7_loading,line_11_14_loading,line_14_8_loading,trafo_0_loading,trafo_1_loading,label
2022-08-07 14:15:00,10.442989,8.811188,8.532482,10.005602,4.71515,6.37802,15.531523,6.186547,6.237922,12.862343,7.295317,2.430222,0.093069,0.18946,0.038488,3.554836,1.960482,0.0
2022-02-20 18:00:00,58.876332,46.15759,16.852316,14.905904,5.28511,7.371939,16.220921,6.463444,6.513284,26.579244,8.418566,2.80503,0.089616,0.182618,0.038456,13.167748,2.263932,0.0
2022-09-02 04:30:00,20.090884,15.589364,6.455949,5.059269,2.399997,3.919602,4.571869,1.807204,1.84791,9.169415,0.834831,0.276147,0.092375,0.188522,0.038673,4.607067,0.21821,0.0
2022-01-18 18:30:00,77.34578,59.850503,22.486283,18.930462,7.50378,11.220514,19.249448,7.676015,7.725754,34.060281,10.247547,3.415456,0.08831,0.179899,0.038404,17.386183,2.758065,0.0
2022-09-27 23:15:00,44.087966,34.059076,12.435875,10.761502,4.041795,5.848706,11.364566,4.52026,4.568997,19.232806,4.702058,1.565138,0.090689,0.184921,0.038562,9.989374,1.260005,0.0
2022-02-20 01:00:00,22.880584,18.599548,7.585475,6.461122,2.54769,3.794567,6.668018,2.642993,2.688306,11.555193,0.843252,0.302247,0.092146,0.188002,0.038719,5.073744,0.227847,0.0
2022-09-15 02:00:00,18.938584,14.884897,6.075673,4.923329,2.18091,3.453266,4.708127,1.861435,1.902599,8.870471,0.844371,0.279319,0.092451,0.188672,0.038673,4.33988,0.220743,0.0
2022-04-27 02:30:00,17.849902,14.122946,5.838098,4.729548,8.637991,3.326534,4.520609,1.786783,1.82736,8.520578,0.483341,0.159503,0.092528,0.188833,0.038683,4.035261,0.126199,2.0
2022-09-10 19:30:00,82.105675,63.466109,21.608764,19.705528,6.503516,8.554664,22.551033,8.997918,9.047922,35.258295,14.178136,4.727831,0.088003,0.179171,0.038291,18.485116,3.820041,0.0
2022-09-24 23:00:00,53.757286,41.58613,14.706478,13.104192,4.574756,6.312297,14.392798,5.731807,5.781368,23.327808,5.835759,1.943286,0.090008,0.183458,0.03853,12.142869,1.566209,0.0


## Export Data

In [32]:
h5_fn = os.path.join(data_fn, 'anomalous_data_y_2022_reduced.h5')
print(h5_fn)
anomalous_data.to_hdf(h5_fn, mode='w', key='df')

/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator/data_modelling/anomalous_data_y_2022_reduced.h5
