In [1]:
import sklearn.utils as sku
import pandas as pd
import numpy as np
import json
import os

# Config

In [2]:
# Lock random seed
np.random.seed(1)

# File path for noise descriptor
noise_file = "../Python/noise_description.json"

# Configuration for training data
training_samples = 1200
validation_samples = 1200
training_file = "../TrainingData/neodata/all_setpoints_500.csv"

# Configuration for test data
test_samples = 100
test_file = "../TestData/neodata/all_setpoints_500.csv"

# Features to pop from all data sets
features_for_removal = []

----
# Add noise and sample training and validation data

In [3]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(training_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        noise = np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))
        all_data[feature] += noise

# Remove unwanted features
for feature in features_for_removal:
    all_data.pop(feature)

for_export_train = None
for_export_valid = None
for f in range(21): # Loop over classes
    
    # Extract >> training_samples + validation_samples << randomly from class data
    fault_data = sku.resample(all_data[all_data['target'] == f],replace=False,n_samples=training_samples+validation_samples, random_state = 42)
    
    # Set first >>training_samples<< number of samples for training data 
    for_export_train = pd.concat([for_export_train,fault_data[0:training_samples]])
    
    # Set next >>validation_samples<< number of samples for validation data
    for_export_valid = pd.concat([for_export_valid,fault_data[training_samples:training_samples+validation_samples]])

# Export training data
for_export_train.to_csv(f"./../TrainingData/neodata/14d_setpoints_{training_samples}.csv", index=None,header=True)

# Export training data
for_export_valid.to_csv(f"./../ValidationData/neodata/14d_setpoints_{validation_samples}.csv", index=None,header=True)

----
# Add noise and sample test data

In [4]:
# Extract training data
# Loop through all files

# Lock random seed
np.random.seed(1)

# Load data and noise description
all_data = pd.read_csv(test_file)
with open(noise_file) as file: noise_desc = json.load(file)

# For each noise descriptor, add noise accordingly
for feature in noise_desc:
    if noise_desc[feature]['var'] > 0:
        noise = np.random.normal(noise_desc[feature]['mean'],noise_desc[feature]['var'],(len(all_data)))
        all_data[feature] += noise

for feature in features_for_removal:
    all_data.pop(feature)

# Sample noisy data and concatenate it
for_export_test = None
for f in range(21): # Loop over classes
    for_export_test = pd.concat([for_export_test,sku.resample(all_data[all_data['target'] == f],replace=False,n_samples=test_samples, random_state = 42)])

# Export training data
for_export_test.to_csv(f"./../TestData/neodata/14d_setpoints_{test_samples}.csv", index=None,header=True)