# Create anomalies on drifted data..

In [1]:
import os
import arrow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

plt.style.use('tableau-colorblind10')

In [2]:
from AnomalyGenerator import AnomalyGenerator

In [3]:
cwd = os.getcwd()
print(cwd)
data_fn = os.path.join(cwd, 'data_modelling')
grid_fn = os.path.join(cwd, 'grid_modelling')
fig_fn = os.path.join(cwd, 'figs')
print(data_fn)
print(grid_fn)

/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator
/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator/data_modelling
/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator/grid_modelling


## Import data

In [6]:
h5_fn = os.path.join(data_fn, 'drifted_data_y_2023_reduced_more_cos_phi.h5')
drifted_data = pd.read_hdf(h5_fn, key='df')

In [8]:
drifted_data['anomaly_labels'] = np.zeros(len(drifted_data.index))

In [9]:
def get_ano_intervalls(cnt_anos):
    k = cnt_anos * 2
    idxs = np.arange(len(drifted_data)).tolist()
    sampled_l = random.sample(population=idxs, k=k)
    sampled_l = sorted(sampled_l)
    intervalls = []
    for i, val in enumerate(sampled_l):
        if i%2==1:
            intervalls.append((sampled_l[i-1], val))    

    print(intervalls)
    return intervalls

In [10]:
def get_single_ano_idx(cnt_anos):
    idxs = np.arange(len(drifted_data)).tolist()
    sampled_l = random.sample(population=idxs, k=cnt_anos)
    sampled_l = sorted(sampled_l)
    print(sampled_l)
    return sampled_l

## Make Anomalies great again!

### Define Anomalies..

In [11]:
outliers = {
    'collective': [{'feature': 'line_1_1_loading', 
                    'timestamps': [(1000,1222), (4560,4580), (30001, 30050), (15000, 15024)],
                    'factors': [400,1000, 1200, 300],
                   },
                   {'feature': 'line_5_6_loading',
                    'factors': [100, 200, 100, 450],
                    'timestamps': [(3,10), (24000, 24020), (11111, 11222), (30000, 32000)],
                   },
                   {'feature': 'trafo_0_loading',
                    'factors': [110, 120, 100, 450],
                    'timestamps': [(1000,1009), (2200, 2210), (11133, 11144), (32000, 32021)],
                   },
                  ],
    
    'point': [{'feature': 'line_2_3_loading',
               'timestamps': [10, 30, 10000, 14420, 17010, 28021, 34340],
               'factors': [400, 100, 120, 200, 300, 110, 120],
              },
              {'feature': 'line_11_14_loading',
               'timestamps': [100, 3000, 11200, 15520, 18020, 26041, 33333],
               'factors': [400, 1000, 300, 200, 101, 90000, 400],
              },
              {'feature': 'trafo_1_loading',
               'timestamps': [1200, 5000, 13200, 17520, 17820, 24041, 32333],
               'factors': [30, 100, 420, 220, 100, 800, 400],
              }
             ],
    
    'noise': [{'feature': 'line_7_8_loading',
               'timestamps': [(10, 30), (4000, 4020), (26666, 26670)],
               'factors': [10, 3, 5],
              }
             ]
}

In [12]:
adGenerator = AnomalyGenerator(drifted_data,outliers)

In [13]:
#pd.options.mocolumnschained_assignment = None
anomalous_data = adGenerator.make_anomalous()
#pd.options.mode.chained_assignment = 'warn'

In [14]:
adGenerator.validate_drifted_anomalie_idx()

Anomaly and Concept Drift at IDX: 20 (2023-01-01 05:00:00)
Anomaly and Concept Drift at IDX: 21 (2023-01-01 05:15:00)
Anomaly and Concept Drift at IDX: 22 (2023-01-01 05:30:00)
Anomaly and Concept Drift at IDX: 23 (2023-01-01 05:45:00)
Anomaly and Concept Drift at IDX: 24 (2023-01-01 06:00:00)
Anomaly and Concept Drift at IDX: 25 (2023-01-01 06:15:00)
Anomaly and Concept Drift at IDX: 26 (2023-01-01 06:30:00)
Anomaly and Concept Drift at IDX: 27 (2023-01-01 06:45:00)
Anomaly and Concept Drift at IDX: 28 (2023-01-01 07:00:00)
Anomaly and Concept Drift at IDX: 29 (2023-01-01 07:15:00)
Anomaly and Concept Drift at IDX: 30 (2023-01-01 07:30:00)
Anomaly and Concept Drift at IDX: 1000 (2023-01-11 10:00:00)
Anomaly and Concept Drift at IDX: 1001 (2023-01-11 10:15:00)
Anomaly and Concept Drift at IDX: 1002 (2023-01-11 10:30:00)
Anomaly and Concept Drift at IDX: 1003 (2023-01-11 10:45:00)
Anomaly and Concept Drift at IDX: 1004 (2023-01-11 11:00:00)
Anomaly and Concept Drift at IDX: 1005 (2023-0

## Export data

In [15]:
h5_fn = os.path.join(data_fn, 'anomalous_drifted_data_y_2023_reduced_more_cos_phi.h5')
print(h5_fn)
anomalous_data.to_hdf(h5_fn, mode='w', key='df')

/Users/torge/Development/master/masterthesis_code/02_Experimente/DataGenerator/data_modelling/anomalous_drifted_data_y_2023_reduced_more_cos_phi.h5
