In [53]:
import pandas as pd
import numpy as np
import json
import os

# Config

In [54]:
# Lock random seed
np.random.seed(1)

# File path for noise descriptor
noise_file = "../Python/noise_description.json"

# Configuration for training data
training_samples = 67
training_file = "../TrainingData/neodata/fault_all_1000.csv"

# Configuration for validation data
validation_samples = 67
validation_file = "../ValidationData/neodata/fault_all_1000.csv"

# Features to pop from all data sets
features_for_removal = ['Psuc','CprPower','Density']

----
# Add noise and sample training data

In [55]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(training_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        all_data[feature] += np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))

for feature in features_for_removal:
    all_data.pop(feature)

# Sample noisy data and concatenate it
for_export = pd.concat( [all_data[all_data['target'] == i].sample(training_samples) for i in range(21)] )

# Export data
try:
    for_export.to_csv(f"./../TrainingData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../TrainingData/neodata/")
    for_export.to_csv(f"./../TrainingData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)

----
# Add noise and sample validation data

In [56]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(validation_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        all_data[feature] += np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))

for feature in features_for_removal:
    all_data.pop(feature)

# Sample noisy data and concatenate it
for_export = pd.concat( [all_data[all_data['target'] == i].sample(training_samples) for i in range(21)] )

# Export data
try:
    for_export.to_csv(f"./../ValidationData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../ValidationData/neodata/")
    for_export.to_csv(f"./../ValidationData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)