In [7]:
from io import StringIO
import pandas as pd
import os
import numpy as np

In [8]:
# How many training samples to extract from each run
# This extracts from end end of each sample sessions

training_samples = 67               # 1200 sampels /18 setpoints = 67
validation_samples = 67
steadyState_samples = 1000          #How many time slices is in steady state. It is veryfied that 1000 is SS for fault 9

In [9]:
#Making the right dimension noise matrix
noise = np.random.normal(0,1,(training_samples,5))      #Generating noise for the five sensors with noise
noise = np.insert(noise,(0,0,0,3,4,5,5),0,axis=1)       #Insert zero collum at places where no noise should be added.

----
# Extracting training data

In [10]:
# Extract training data
# Loop through all files

# Create empty object to concatenate to
concat = None
for n in range(0,21):            

    # Handle special case of non-faulty data
    if n == 0:
        file_name = "../TrainingData/Ntrain1.csv"
    else:
        file_name = f"../TrainingData/Ftrain{n}.csv"
    f = open(file_name)
    read_str = ""
    
    # Read into string buffer and throw away excess labels
    label_cnt = 1
    labels_str = f.readline()
    labels = labels_str.replace("\n",'').split(',')
    
    while line:=f.readline():
        read_str += line
        
    splits = read_str.split(labels_str)
    lengths = [len(section.split("\n")) for section in splits]

    print(f"Found {len(splits)} sets in file number {n} of lengths : {lengths}")
    
    # Iterate through each split and concatenate last part
    csv = None
    for section in splits:
        sec  = pd.read_csv(StringIO(labels_str + section)).assign(target=n)
        sec = sec[len(sec)-steadyState_samples:len(sec)]                        #Chose the steady state area (repetative behavior)
        sec = sec.sample(training_samples)                                      #Take a sample    
        sec = sec.drop(['Psuc','CprPower','Density'], axis=1)                   #Drop features
        #sec = sec + noise                                                      #Add noise
        csv = pd.concat([csv,sec])
        
    # Concatenate for all numbers
    concat = pd.concat([concat , csv])

try:
    concat.to_csv(f"./../TrainingData/neodata/fault_all_dropFeatures_{training_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../TrainingData/neodata/")
    concat.to_csv(f"./../TrainingData/neodata/fault_all_dropFeatures_{training_samples}.csv", index=None,header=True)

Found 18 sets in file number 0 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 1 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 2 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 3 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 4 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 5 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001, 6001]
Found 18 sets in file number 6 of lengths : [6001, 6001, 6001, 6001, 6001, 6001, 6

In [13]:
#Making the right dimension noise matrix
noise = np.random.normal(0,1,(validation_samples,5))      #Generating noise for the five sensors with noise
noise = np.insert(noise,(0,0,0,3,4,5,5),0,axis=1)       #Insert zero collum at places where no noise should be added.


In [14]:

# Extract validation data
# Loop through all files

# Create empty object to concatenate to
concat = None
for n in range(0,21):

    # Handle special case of non-faulty data
    if n == 0:
        file_name = "../ValidationData/NewValid_n1.csv"
    else:
        file_name = f"../ValidationData/NewValid_f{n}.csv"
    f = open(file_name)
    read_str = ""
    
    # Read into string buffer and throw away excess labels
    label_cnt = 1
    labels_str = f.readline()
    labels = labels_str.replace("\n",'').split(',')
    
    while line:=f.readline():
        read_str += line
        
    splits = read_str.split(labels_str)
    lengths = [len(section.split("\n")) for section in splits]

    print(f"Found {len(splits)} sets in file number {n} of lengths : {lengths}")
    
    # Iterate through each split and concatenate last part
    csv = None
    for section in splits:
        sec  = pd.read_csv(StringIO(labels_str + section)).assign(target=n)
        sec = sec[len(sec)-validation_samples:len(sec)]                        #Chose the steady state area (repetative behavior)
        sec = sec.sample(validation_samples)                                      #Take a sample    
        sec = sec.drop(['Psuc','CprPower','Density'], axis=1)                   #Drop features
        #sec = sec + noise  
        csv = pd.concat([csv,sec])
        
    # Concatenate for all numbers
    concat = pd.concat([concat , csv])

try:
    concat.to_csv(f"./../ValidationData/neodata/fault_all_dropfeatures_{validation_samples}.csv", index=None,header=True)
except OSError:
    os.mkdir(f"./../ValidationData/neodata/")
    concat.to_csv(f"./../ValidationData/neodata/fault_all_dropfeatures_{validation_samples}.csv", index=None,header=True)

Found 1 sets in file number 0 of lengths : [6001]
Found 1 sets in file number 1 of lengths : [10001]
Found 1 sets in file number 2 of lengths : [10001]
Found 1 sets in file number 3 of lengths : [10001]
Found 1 sets in file number 4 of lengths : [10001]
Found 1 sets in file number 5 of lengths : [10001]
Found 1 sets in file number 6 of lengths : [10001]
Found 1 sets in file number 7 of lengths : [10001]
Found 1 sets in file number 8 of lengths : [10001]
Found 1 sets in file number 9 of lengths : [10001]
Found 1 sets in file number 10 of lengths : [10001]
Found 1 sets in file number 11 of lengths : [10001]
Found 1 sets in file number 12 of lengths : [10001]
Found 1 sets in file number 13 of lengths : [10001]
Found 1 sets in file number 14 of lengths : [10001]
Found 1 sets in file number 15 of lengths : [10001]
Found 1 sets in file number 16 of lengths : [10001]
Found 1 sets in file number 17 of lengths : [10001]
Found 1 sets in file number 18 of lengths : [10001]
Found 1 sets in file nu