In [2]:
import pandas as pd
import numpy as np
import json
import os

# Config

In [3]:
# Lock random seed
np.random.seed(1)

# File path for noise descriptor
noise_file = "../Python/noise_description.json"

# Configuration for training data
training_samples = 67
training_file = "../TrainingData/neodata/fault_all_1000.csv"

# Configuration for validation data
validation_samples = 67
validation_file = "../TrainingData/neodata/fault_all_1000.csv"

# Configuration for validation data
test_samples = 67
test_file = "../TestData/neodata/fault_all_1000.csv"

# Features to pop from all data sets
features_for_removal = ['Psuc','CprPower','Density']

----
# Add noise and sample training and validation data

In [4]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(training_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        noise = np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))
        all_data[feature] += noise

for feature in features_for_removal:
    all_data.pop(feature)

length = int(len(all_data[all_data['target'] == 0])/18)

# Sample noisy data and concatenate it
for_export_train = None
for_export_valid = None
for f in range(21): # Loop over classes
    for i in range(18): # Loop over setpoints
        fault_data = all_data[all_data['target'] == f][length*i : length*(i+1)]
        samples = fault_data.sample(training_samples + validation_samples,random_state=1)
        for_export_train = pd.concat( [for_export_train, samples[0:training_samples] ] )
        for_export_valid = pd.concat( [for_export_valid, samples[training_samples:training_samples + validation_samples] ] )

# Export training data
try:
    for_export_train.to_csv(f"./../TrainingData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../TrainingData/neodata/")
    for_export_train.to_csv(f"./../TrainingData/neodata/fault_all_noise_{training_samples}.csv", index=None,header=True)
    
# Export validation data
try:
    for_export_valid.to_csv(f"./../ValidationData/neodata/fault_all_noise_{validation_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../ValidationData/neodata/")
    for_export_valid.to_csv(f"./../ValidationData/neodata/fault_all_noise_{validation_samples}.csv", index=None,header=True)

----
# Add noise and sample test data

In [6]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(test_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        noise = np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))
        all_data[feature] += noise

for feature in features_for_removal:
    all_data.pop(feature)

length = int(len(all_data[all_data['target'] == 0])/18)

# Sample noisy data and concatenate it
for_export = pd.concat( [all_data[all_data['target'] == f].sample(test_samples) for f in range(21) ] )

# Export data
try:
    for_export.to_csv(f"./../TestData/neodata/fault_all_noise_{test_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../TestData/neodata/")
    for_export.to_csv(f"./../TestData/neodata/fault_all_noise_{test_samples}.csv", index=None,header=True)