#All imports


In [1]:
import numpy as np
import json
import os
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings("ignore")

cwd = os.getcwd()

# Training and testing for "known-noise"

In [13]:
values = {}
def classify_ten_fold_known_noise(Xdata, ydata):
    #Xdata= Xdata[:, :500] #Uncomment for 1s long trace
    skf = StratifiedShuffleSplit(n_splits=10,random_state=6) #random_state may vary
    partial_accuracies=[]
    for train_index, test_index in skf.split(Xdata, ydata):
        X_train, X_test = Xdata[train_index], Xdata[test_index]
        mean=X_train.mean()
        std=X_train.std()
        X_train_norm=(X_train-mean+1e-10)/std
        X_test_norm=(X_test-mean+1e-10)/std
        y_train, y_test = ydata[train_index], ydata[test_index]
        model=LogisticRegression(max_iter=1000)
        model.fit(X_train_norm,y_train)
        prd=model.predict(X_test_norm)

        partial_accuracies.append(classification_report(y_test, prd, output_dict=True)['accuracy'])

    mean=round(np.mean(partial_accuracies)*100,2)
    deviation=round(np.std(partial_accuracies)*100,2)
    values[file[:-5]] = mean
    return (str(mean)+"%±"+str(deviation)+"%")
    #return mean

# Call on all files in current directory

In [15]:
for file in os.listdir(cwd):
    if os.path.isfile(os.path.join(cwd, file)):
        # Call the classify function on the file
        f = open(file)
        data = json.load(f)
        X=np.array(data['X'],dtype=np.float32)
        print(X.shape)
        y=np.array(data['y'])
        p_lr= classify_ten_fold_known_noise(X,y) #returns mean of 10 fold CV and Standard Deviation
        print (file[:-5] + " " + str(p_lr))

(200, 5000)
flipStress_p6_t0.5s 54.5%±7.23%
(200, 5000)
flipStress_p2_t0.5s 64.0%±9.43%
(200, 5000)
flipStress_p10_t0.5s 51.5%±6.34%
(200, 5000)
flipStress_p8_t0.5s 52.5%±8.44%
(200, 5000)
flipStress_p1_t0.5s 78.5%±8.38%
(200, 5000)
flipStress_p4_t0.5s 64.0%±11.58%
(200, 5000)
flipStress_p12_t0.5s 43.5%±6.73%


#Training without noise, Testing on noisy dataset.

In [16]:
#Train without noise or another file
filename="NoNoise_10s.json"
f = open(filename)
data = json.load(f)
X_train=np.array(data['X'],dtype=np.float32)
y_train=np.array(data['y'])
mean=X_train.mean()
std=X_train.std()
X_train_norm=(X_train-mean+1e-10)/std
model=LogisticRegression(max_iter=1000)
model.fit(X_train_norm,y_train)
prd = model.predict(X_train_norm)

# Function to Test the stressor data
def classify(filename):
  f = open(filename)
  data = json.load(f)
  X_test = np.array(data['X'], dtype=np.float32)
  #X_test= X_test[:, :500] #KEEP WHEN CONVERTING 10s data to 1s data
  y_test = np.array(data['y'])
  X_test_norm = (X_test - mean + 1e-10) / std
  prd = model.predict(X_test_norm)
  p_lr=classification_report(y_test, prd, output_dict=True)['accuracy']
  filename = filename[:-5]
  print (filename + " " + str(round(p_lr*100,2)) + "%")

# Iterate over all files in the current directory
for file in os.listdir(cwd):
    # Check if the file is a regular file (not a directory)
    if os.path.isfile(os.path.join(cwd, file)):
        # Call the classify function on the file
        classify(file)

flipStress_p6_t0.5s 50.0%
flipStress_p2_t0.5s 50.0%
flipStress_p10_t0.5s 50.0%
flipStress_p8_t0.5s 50.0%
NoNoise_10s 100.0%
flipStress_p1_t0.5s 50.0%
flipStress_p4_t0.5s 50.0%
flipStress_p12_t0.5s 50.0%
