In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import sys
logging.basicConfig(format='%(asctime)s %(levelname)-7s %(message)s',
                    stream=sys.stderr, level=logging.INFO)

import os
sys.path.append(os.path.join(os.environ['VIRTUAL_ENV'],f"lib/python{'.'.join([str(x) for x in sys.version_info[0:2]])}/site-packages")) 
from pathlib import Path
import pandas as pd
import numpy as np
import joblib
import json

from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from tsfresh.feature_extraction import settings
from sklearn.model_selection import train_test_split

from data_pipeline import DataPipeline
from vae import VAE

2023-06-23 12:59:36,045 INFO    Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.


In [3]:
repeat_nums = [0,1,2] #You can give a list of integers between 0 - 10
expConfig_nums = [0,1,2,3,4,5]

In [4]:
#You can either use one of the feature extraction methods or use the features determined beforehand
use_pre_selected_features = True
extracted_data = False

In [5]:
data_dir = f"/projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset"
selected_features_filename = "/projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/synthetic_dataset_experiments/fe_eclipse_tsfresh_raw_CHI_2000.json"

In [6]:
# "paper_vae_best" -> this one has all applications and healthy and memleak runs.
model_folder_output_name = "ae_experiments_vae"
output_dir = f"/projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/sc_ae_experiments/models/{model_folder_output_name}"

In [7]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print("Created outputs directory")   
else:
    print("Output directory already exists")
    
if not os.path.exists(output_dir + "/results"):
    os.makedirs(output_dir + "/results")
    print("Created results directory")   
else:
    print("Results directory already exists")       

Output directory already exists
Results directory already exists


In [8]:
for repeat_num in repeat_nums:
    
    print(f"######### Repeat Num: {repeat_num}  #########")
    
    for expConfig_num in expConfig_nums:
        
        print(f"######### Experimental Configuration: {expConfig_num}  #########")
                        
        if use_pre_selected_features: 
            with open(selected_features_filename, "r") as fp:
                selected_features_json = json.load(fp)

            fe_selected_features = selected_features_json['tsfresh_column_names']
                        
        if expConfig_num == 0:
            healthy_test_data_percentage = 0.2
        elif expConfig_num == 1:
            healthy_test_data_percentage = 0.4
        elif expConfig_num == 2:
            healthy_test_data_percentage = 0.6
        elif expConfig_num == 3:
            healthy_test_data_percentage = 0.8
        elif expConfig_num == 4:
            healthy_test_data_percentage = 0.9        
        elif expConfig_num == 5:
            healthy_test_data_percentage = 0.95                    
            
            
        if not os.path.exists(output_dir):
            logging.info(f"Creating: {output_dir}")
            os.makedirs(output_dir)            
            
            
        pipeline = DataPipeline()
        x_train, y_train, x_test, y_test, x_val, y_val = pipeline.load_HPC_data(data_dir)
        
        
        all_y = pd.concat([y_train, y_test])
        print(all_y.shape)

        all_x = pd.concat([x_train, x_test])
        all_x.set_index(['job_id', 'component_id'],inplace=True)
        print(all_x.shape)        
        
        
        selected_apps = ['exa', 'lammps', 'sw4', 'sw4lite']
        #selected_apps = ['exa', 'lammps']
        selected_labels = ['none', 'memleak']
        
        
        exp_config_dict = {
            'selected_apps': selected_apps,
            'selected_labels': selected_labels,
            'dataset_stats' : {
                                'train': {
                                            '0': 0,
                                            '1': 0
                                         },
                                'test': {
                                            '0': 0,
                                            '1': 0                        
                                }
                                }
        }        
        
        
        curr_all_y = all_y[(all_y['app_name'].isin(selected_apps)) & (all_y['anom_name'].isin(selected_labels))]        
        
        
        healthy_labels = curr_all_y[curr_all_y['binary_anom'] == 0]
        anom_labels = curr_all_y[curr_all_y['binary_anom'] != 0]
        
        #Train test split on the healthy node_ids
        train_label_healthy, test_label_healthy = train_test_split(healthy_labels, 
                                                                   test_size=healthy_test_data_percentage)#, random_state=0)

        print(train_label_healthy.shape)
        print(test_label_healthy.shape)

        exp_config_dict['dataset_stats']['train']['0'] = train_label_healthy.shape[0]
        exp_config_dict['dataset_stats']['test']['0'] = test_label_healthy.shape[0]       

        #Train data only have healthy node_ids
        x_train = all_x.loc[train_label_healthy.index]
        y_train = train_label_healthy.copy()

        assert set(x_train.index.get_level_values('component_id')) == set(y_train.index.get_level_values('component_id'))

        print(f"Train data shape: {x_train.shape} with {len(set(x_train.index.get_level_values('component_id')))} unique jobid compid combos")
        print(f"Train label dist\n{y_train['binary_anom'].value_counts()}")

        assert len(x_train.index.unique()) == len(y_train)   
        
        #Test data will have some healthy and anomalous node_ids
        test_data_healthy = all_x.loc[test_label_healthy.index]
        assert set(test_data_healthy.index.get_level_values('component_id')) == set(test_label_healthy.index.get_level_values('component_id'))

        anom_data = all_x.loc[anom_labels.index]
        exp_config_dict['dataset_stats']['test']['1'] = anom_labels.shape[0]

        assert set(anom_data.index.get_level_values('component_id')) == set(anom_labels.index.get_level_values('component_id'))

        x_test = pd.concat([test_data_healthy, anom_data])
        y_test = pd.concat([test_label_healthy, anom_labels])

        assert set(x_test.index.get_level_values('component_id')) == set(y_test.index.get_level_values('component_id'))

        print(f"Test data shape: {x_test.shape}")
        print(f"Test label dist\n{y_test['binary_anom'].value_counts()}")


        assert len(x_test.index.unique()) == len(y_test)      
        
        x_train.reset_index(inplace=True)
        x_test.reset_index(inplace=True)
        
        #Generate features for the train data
        if use_pre_selected_features:
            x_train_fe = pipeline.tsfresh_generate_features(x_train, fe_config=None, kind_to_fc_parameters=fe_selected_features)
        elif not extracted_data:
            x_train_fe = pipeline.tsfresh_generate_features(x_train, fe_config="minimal")
        else:
            x_train_fe = x_train.copy(deep=True)
            logging.info(f"Features are already extracted")

        if not (y_train is None):
            y_train = y_train.loc[x_train_fe.index]        

        assert all(y_train.index == x_train_fe.index)            
        
        
        #Generate features for the test data
        if not (x_test) is None: 
            if use_pre_selected_features:
                x_test_fe = pipeline.tsfresh_generate_features(x_test, fe_config=None, kind_to_fc_parameters=fe_selected_features)    
            elif not (extracted_data):
                x_test_fe = pipeline.tsfresh_generate_features(x_test, fe_config="minimal")
            else:
                x_test_fe = x_test.copy(deep=True)
                logging.info(f"Features are already extracted")

        if not (y_test is None):
            y_test = y_test.loc[x_test_fe.index]

        assert all(y_test.index == x_test_fe.index)            
        
        
        #Make the number of columns and the order equal
        if not (x_test) is None:

            if len(x_test_fe.columns) < len(x_train_fe.columns):
                x_train_fe = x_train_fe[x_test_fe.columns]
                print(x_train_fe.shape)

            elif len(x_test_fe.columns) > len(x_train_fe.columns):
                x_test_fe = x_test_fe[x_train_fe.columns]
                print(x_test_fe.shape)

            x_train_fe = x_train_fe[x_test_fe.columns]    
            assert all(x_train_fe.columns == x_test_fe.columns)    
            x_test_fe = x_test_fe.loc[y_test.index]


        #Generate features for the val data
        if not (x_val) is None: 
            if not (extracted_data):
                x_val_fe = pipeline.tsfresh_generate_features(x_val, fe_config="minimal")
            else:
                x_val_fe = x_val.copy(deep=True)
                logging.info(f"Features are already extracted")

        if not (y_val is None):
            y_val = y_val.loc[x_val_fe.index]    


        #Apply scaler to train and test data(if it exists)
        if not (x_test is None):
            x_train_scaled, x_test_scaled = pipeline.scale_data(x_train_fe, x_test_fe, save_dir=output_dir)
        else:
            x_train_scaled, x_test_scaled = pipeline.scale_data(x_train_fe, None, save_dir=output_dir)

        logging.info(f"X_train scaled shape: {x_train_scaled.shape}")      
            
            
        input_dim = x_train_scaled.shape[1]
        intermediate_dim = int(input_dim / 2)
        latent_dim = int(input_dim / 3)
        
        
        if 'vae' in locals():
            print("Vae exists; will delete to be safe")
            del vae
        else:
            print("Vae is not defined")

        vae = VAE(
                    name="model",
                    input_dim=input_dim,
                    intermediate_dim=intermediate_dim,
                    latent_dim=latent_dim,
                    learning_rate=1e-4
        )
        
        
        # train the VAE model
        vae.fit(
                x_train=x_train_scaled,
                epochs=1000,
                batch_size=32, 
                validation_split=0.1,
                save_dir=output_dir,
                verbose=0
        )        
        
        deployment_metadata = {
            'threshold': vae.threshold,
            #Column order is important when feeding the data
            'raw_column_names': list(x_train_scaled.columns),
            'fe_column_names': settings.from_columns(list(x_train_scaled.columns))
        }

        with open(Path(output_dir) / 'deployment_metadata.json', 'w') as fp:
            json.dump(deployment_metadata, fp)
            
            
        y_pred_train, x_train_recon_errors = vae.predict_anomaly(x_train_scaled)

        if not (y_train is None):
            print("Classification Report in Training Data\n")
            print(classification_report(y_train['binary_anom'].values, y_pred_train))        
            
            
        if not (x_test is None):

            assert all(x_test_scaled.columns == x_train_scaled.columns)
            assert all(x_test_scaled.index == y_test.index)

            y_pred_test, x_test_recon_errors = vae.predict_anomaly(x_test_scaled)    
            logging.info(f"Test data prediction results: {y_pred_test}")
            logging.info(f"Selected threshold value\n: {vae.threshold}")
            #logging.info(f"Test data reconstruction errors\n: {x_test_recon_errors}")          
            
        #If the labeled test data exists, generate results
        if not (y_test is None):
            y_test['binary_pred'] = y_pred_test
            y_test['recon_errors'] = x_test_recon_errors
            print("Classification Report in Test Data\n")
            print(classification_report(y_test['binary_anom'].values, y_pred_test))  

            result_dict = classification_report(y_test['binary_anom'].values, y_pred_test, output_dict= True)                        

            
        # Writing to sample.json     
        with open( Path(output_dir) / "results" / f"expConfig_{expConfig_num}_repeatNum_{repeat_num}_testResults.json", "w") as outfile:
            json.dump(result_dict, outfile)

        # Writing to sample.json
        with open( Path(output_dir) / "results" / f"expConfig_{expConfig_num}_repeatNum_{repeat_num}_dataStats.json", "w") as outfile:
            json.dump(exp_config_dict, outfile)                
            
            
        print(f"################################################################################################################################################")            

######### Repeat Num: 0  #########
######### Experimental Configuration: 0  #########


2023-06-23 12:59:41,246 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 12:59:41,247 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 12:59:43,667 INFO    Data read successfully
2023-06-23 12:59:43,668 INFO    Shape of x_train: (145826, 159)
2023-06-23 12:59:43,669 INFO    Shape of y_train: (64, 4)
2023-06-23 12:59:43,669 INFO    Shape of x_test: (375239, 159)
2023-06-23 12:59:43,670 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(64, 4)
(16, 4)
Train data shape: (138230, 157) with 57 unique jobid compid combos
Train label dist
0    64
Name: binary_anom, dtype: int64
Test data shape: (210147, 157)
Test label dist
1    80
0    16
Name: binary_anom, dtype: int64


2023-06-23 12:59:46,207 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:43<00:00,  1.09s/it]
2023-06-23 13:00:30,668 INFO    Feature extraction: Before dropping NaNs: (64, 2000)
2023-06-23 13:00:30,678 INFO    Feature extraction: Dropped NaNs: (64, 2000)
2023-06-23 13:00:31,139 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:24<00:00,  2.11s/it]
2023-06-23 13:01:56,257 INFO    Feature extraction: Before dropping NaNs: (96, 2000)
2023-06-23 13:01:56,269 INFO    Feature extraction: Dropped NaNs: (96, 2000)
2023-06-23 13:01:56,363 INFO    x_test is not None, scaling
2023-06-23 13:01:56,395 INFO    Scaler is saved
2023-06-23 13:01:56,395 INFO    X_train scaled shape: (64, 2000)


Vae is not defined


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:02:29,920 INFO    Test data prediction results: [1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:02:29,921 INFO    Selected threshold value
: 0.0749679223306885


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        64
           1       0.00      0.00      0.00         0

    accuracy                           0.98        64
   macro avg       0.50      0.49      0.50        64
weighted avg       1.00      0.98      0.99        64

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.88      0.93        16
           1       0.98      1.00      0.99        80

    accuracy                           0.98        96
   macro avg       0.99      0.94      0.96        96
weighted avg       0.98      0.98      0.98        96

################################################################################################################################################
######### Experimental Configuration: 1  #########


2023-06-23 13:02:30,916 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:02:30,918 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:02:34,000 INFO    Data read successfully
2023-06-23 13:02:34,001 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:02:34,002 INFO    Shape of y_train: (64, 4)
2023-06-23 13:02:34,003 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:02:34,003 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(48, 4)
(32, 4)
Train data shape: (112693, 157) with 44 unique jobid compid combos
Train label dist
0    48
Name: binary_anom, dtype: int64
Test data shape: (235684, 157)
Test label dist
1    80
0    32
Name: binary_anom, dtype: int64


2023-06-23 13:02:36,844 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:34<00:00,  1.14it/s]
2023-06-23 13:03:12,639 INFO    Feature extraction: Before dropping NaNs: (48, 2000)
2023-06-23 13:03:12,649 INFO    Feature extraction: Dropped NaNs: (48, 2000)
2023-06-23 13:03:13,019 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:30<00:00,  2.27s/it]
2023-06-23 13:04:45,090 INFO    Feature extraction: Before dropping NaNs: (112, 2000)
2023-06-23 13:04:45,102 INFO    Feature extraction: Dropped NaNs: (112, 2000)
2023-06-23 13:04:45,212 INFO    x_test is not None, scaling
2023-06-23 13:04:45,244 INFO    Scaler is saved
2023-06-23 13:04:45,246 INFO    X_train scaled shape: (48, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:05:16,444 INFO    Test data prediction results: [0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1]
2023-06-23 13:05:16,446 INFO    Selected threshold value
: 0.08341139545489218


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.96      0.98        48
           1       0.00      0.00      0.00         0

    accuracy                           0.96        48
   macro avg       0.50      0.48      0.49        48
weighted avg       1.00      0.96      0.98        48

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32
           1       1.00      1.00      1.00        80

    accuracy                           1.00       112
   macro avg       1.00      1.00      1.00       112
weighted avg       1.00      1.00      1.00       112

################################################################################################################################################
######### Experimental Configuration: 2  #########


2023-06-23 13:05:17,288 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:05:17,289 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:05:19,808 INFO    Data read successfully
2023-06-23 13:05:19,809 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:05:19,809 INFO    Shape of y_train: (64, 4)
2023-06-23 13:05:19,810 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:05:19,811 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(32, 4)
(48, 4)
Train data shape: (68702, 157) with 28 unique jobid compid combos
Train label dist
0    32
Name: binary_anom, dtype: int64
Test data shape: (279675, 157)
Test label dist
1    80
0    48
Name: binary_anom, dtype: int64


2023-06-23 13:05:22,347 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:21<00:00,  1.90it/s]
2023-06-23 13:05:44,207 INFO    Feature extraction: Before dropping NaNs: (32, 2000)
2023-06-23 13:05:44,217 INFO    Feature extraction: Dropped NaNs: (32, 2000)
2023-06-23 13:05:44,779 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:48<00:00,  2.72s/it]
2023-06-23 13:07:34,654 INFO    Feature extraction: Before dropping NaNs: (128, 2000)
2023-06-23 13:07:34,676 INFO    Feature extraction: Dropped NaNs: (128, 2000)
2023-06-23 13:07:34,772 INFO    x_test is not None, scaling
2023-06-23 13:07:34,803 INFO    Scaler is saved
2023-06-23 13:07:34,804 INFO    X_train scaled shape: (32, 2000)


Vae exists; will delete to be safe


2023-06-23 13:07:54,032 INFO    Test data prediction results: [1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]
2023-06-23 13:07:54,033 INFO    Selected threshold value
: 0.08053754215306132


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.73      0.84        48
           1       0.86      1.00      0.92        80

    accuracy                           0.90       128
   macro avg       0.93      0.86      0.88       128
weighted avg       0.91      0.90      0.89       128

################################################################################################################################################
######### Experimental Configuration: 3  #########


2023-06-23 13:07:54,970 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:07:54,971 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:07:57,975 INFO    Data read successfully
2023-06-23 13:07:57,977 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:07:57,978 INFO    Shape of y_train: (64, 4)
2023-06-23 13:07:57,978 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:07:57,979 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(16, 4)
(64, 4)
Train data shape: (35577, 157) with 16 unique jobid compid combos
Train label dist
0    16
Name: binary_anom, dtype: int64
Test data shape: (312800, 157)
Test label dist
1    80
0    64
Name: binary_anom, dtype: int64


2023-06-23 13:08:00,779 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:12<00:00,  3.30it/s]
2023-06-23 13:08:13,644 INFO    Feature extraction: Before dropping NaNs: (16, 2000)
2023-06-23 13:08:13,654 INFO    Feature extraction: Dropped NaNs: (16, 2000)
2023-06-23 13:08:14,272 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:55<00:00,  2.89s/it]
2023-06-23 13:10:10,879 INFO    Feature extraction: Before dropping NaNs: (144, 2000)
2023-06-23 13:10:10,890 INFO    Feature extraction: Dropped NaNs: (144, 2000)
2023-06-23 13:10:10,984 INFO    x_test is not None, scaling
2023-06-23 13:10:11,017 INFO    Scaler is saved
2023-06-23 13:10:11,018 INFO    X_train scaled shape: (16, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:10:28,196 INFO    Test data prediction results: [0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1]
2023-06-23 13:10:28,198 INFO    Selected threshold value
: 0.11605887441945942


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        16
           1       0.00      0.00      0.00         0

    accuracy                           0.94        16
   macro avg       0.50      0.47      0.48        16
weighted avg       1.00      0.94      0.97        16

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.81      0.90        64
           1       0.87      1.00      0.93        80

    accuracy                           0.92       144
   macro avg       0.93      0.91      0.91       144
weighted avg       0.93      0.92      0.92       144

################################################################################################################################################
######### Experimental Configuration: 4  #########


2023-06-23 13:10:29,012 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:10:29,013 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:10:31,520 INFO    Data read successfully
2023-06-23 13:10:31,521 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:10:31,522 INFO    Shape of y_train: (64, 4)
2023-06-23 13:10:31,523 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:10:31,524 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(8, 4)
(72, 4)
Train data shape: (17889, 157) with 8 unique jobid compid combos
Train label dist
0    8
Name: binary_anom, dtype: int64
Test data shape: (330488, 157)
Test label dist
1    80
0    72
Name: binary_anom, dtype: int64


2023-06-23 13:10:34,125 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:05<00:00,  6.53it/s]
2023-06-23 13:10:40,870 INFO    Feature extraction: Before dropping NaNs: (8, 2000)
2023-06-23 13:10:40,881 INFO    Feature extraction: Dropped NaNs: (8, 2000)
2023-06-23 13:10:41,626 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:56<00:00,  2.91s/it]
2023-06-23 13:12:39,218 INFO    Feature extraction: Before dropping NaNs: (152, 2000)
2023-06-23 13:12:39,230 INFO    Feature extraction: Dropped NaNs: (152, 2000)
2023-06-23 13:12:39,332 INFO    x_test is not None, scaling
2023-06-23 13:12:39,365 INFO    Scaler is saved
2023-06-23 13:12:39,366 INFO    X_train scaled shape: (8, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:12:57,651 INFO    Test data prediction results: [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:12:57,652 INFO    Selected threshold value
: 0.14588171772533726


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.88      0.93         8
           1       0.00      0.00      0.00         0

    accuracy                           0.88         8
   macro avg       0.50      0.44      0.47         8
weighted avg       1.00      0.88      0.93         8

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.57      0.73        72
           1       0.72      1.00      0.84        80

    accuracy                           0.80       152
   macro avg       0.86      0.78      0.78       152
weighted avg       0.85      0.80      0.78       152

################################################################################################################################################
######### Experimental Configuration: 5  #########


2023-06-23 13:12:58,649 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:12:58,650 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:13:01,739 INFO    Data read successfully
2023-06-23 13:13:01,741 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:13:01,742 INFO    Shape of y_train: (64, 4)
2023-06-23 13:13:01,743 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:13:01,743 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(4, 4)
(76, 4)
Train data shape: (6965, 157) with 4 unique jobid compid combos
Train label dist
0    4
Name: binary_anom, dtype: int64
Test data shape: (341412, 157)
Test label dist
1    80
0    76
Name: binary_anom, dtype: int64


2023-06-23 13:13:04,666 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:01<00:00, 19.78it/s]
2023-06-23 13:13:07,414 INFO    Feature extraction: Before dropping NaNs: (4, 2000)
2023-06-23 13:13:07,424 INFO    Feature extraction: Dropped NaNs: (4, 2000)
2023-06-23 13:13:08,153 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [02:11<00:00,  3.28s/it]
2023-06-23 13:15:20,579 INFO    Feature extraction: Before dropping NaNs: (156, 2000)
2023-06-23 13:15:20,590 INFO    Feature extraction: Dropped NaNs: (156, 2000)
2023-06-23 13:15:20,683 INFO    x_test is not None, scaling
2023-06-23 13:15:20,714 INFO    Scaler is saved
2023-06-23 13:15:20,715 INFO    X_train scaled shape: (4, 2000)


Vae exists; will delete to be safe


2023-06-23 13:15:38,634 INFO    Test data prediction results: [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
2023-06-23 13:15:38,636 INFO    Selected threshold value
: 0.22605065426628518


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4

Classification Report in Test Data

              precision    recall  f1-score   support

           0       0.62      0.84      0.71        76
           1       0.77      0.50      0.61        80

    accuracy                           0.67       156
   macro avg       0.69      0.67      0.66       156
weighted avg       0.69      0.67      0.66       156

################################################################################################################################################
######### Repeat Num: 1  #########
######### Experimental Configuration: 0  #########


2023-06-23 13:15:39,409 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:15:39,410 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:15:42,103 INFO    Data read successfully
2023-06-23 13:15:42,104 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:15:42,105 INFO    Shape of y_train: (64, 4)
2023-06-23 13:15:42,105 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:15:42,106 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(64, 4)
(16, 4)
Train data shape: (138186, 157) with 58 unique jobid compid combos
Train label dist
0    64
Name: binary_anom, dtype: int64
Test data shape: (210191, 157)
Test label dist
1    80
0    16
Name: binary_anom, dtype: int64


2023-06-23 13:15:44,811 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:42<00:00,  1.06s/it]
2023-06-23 13:16:28,140 INFO    Feature extraction: Before dropping NaNs: (64, 2000)
2023-06-23 13:16:28,151 INFO    Feature extraction: Dropped NaNs: (64, 2000)
2023-06-23 13:16:28,481 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:28<00:00,  2.21s/it]
2023-06-23 13:17:58,202 INFO    Feature extraction: Before dropping NaNs: (96, 2000)
2023-06-23 13:17:58,218 INFO    Feature extraction: Dropped NaNs: (96, 2000)
2023-06-23 13:17:58,317 INFO    x_test is not None, scaling
2023-06-23 13:17:58,348 INFO    Scaler is saved
2023-06-23 13:17:58,350 INFO    X_train scaled shape: (64, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:18:33,258 INFO    Test data prediction results: [0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:18:33,259 INFO    Selected threshold value
: 0.07057513256677446


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        64
           1       0.00      0.00      0.00         0

    accuracy                           0.98        64
   macro avg       0.50      0.49      0.50        64
weighted avg       1.00      0.98      0.99        64

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        16
           1       0.99      1.00      0.99        80

    accuracy                           0.99        96
   macro avg       0.99      0.97      0.98        96
weighted avg       0.99      0.99      0.99        96

################################################################################################################################################
######### Experimental Configuration: 1  #########


2023-06-23 13:18:34,009 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:18:34,010 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:18:36,716 INFO    Data read successfully
2023-06-23 13:18:36,717 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:18:36,718 INFO    Shape of y_train: (64, 4)
2023-06-23 13:18:36,719 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:18:36,719 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(48, 4)
(32, 4)
Train data shape: (111549, 157) with 45 unique jobid compid combos
Train label dist
0    48
Name: binary_anom, dtype: int64
Test data shape: (236828, 157)
Test label dist
1    80
0    32
Name: binary_anom, dtype: int64


2023-06-23 13:18:39,638 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:34<00:00,  1.15it/s]
2023-06-23 13:19:15,308 INFO    Feature extraction: Before dropping NaNs: (48, 2000)
2023-06-23 13:19:15,319 INFO    Feature extraction: Dropped NaNs: (48, 2000)
2023-06-23 13:19:15,862 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:28<00:00,  2.22s/it]
2023-06-23 13:20:45,617 INFO    Feature extraction: Before dropping NaNs: (112, 2000)
2023-06-23 13:20:45,629 INFO    Feature extraction: Dropped NaNs: (112, 2000)
2023-06-23 13:20:45,733 INFO    x_test is not None, scaling
2023-06-23 13:20:45,764 INFO    Scaler is saved
2023-06-23 13:20:45,766 INFO    X_train scaled shape: (48, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:21:18,585 INFO    Test data prediction results: [0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:21:18,586 INFO    Selected threshold value
: 0.06763546091842296


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.98      0.99        48
           1       0.00      0.00      0.00         0

    accuracy                           0.98        48
   macro avg       0.50      0.49      0.49        48
weighted avg       1.00      0.98      0.99        48

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.66      0.79        32
           1       0.88      1.00      0.94        80

    accuracy                           0.90       112
   macro avg       0.94      0.83      0.86       112
weighted avg       0.91      0.90      0.89       112

################################################################################################################################################
######### Experimental Configuration: 2  #########


2023-06-23 13:21:19,573 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:21:19,574 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:21:22,678 INFO    Data read successfully
2023-06-23 13:21:22,679 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:21:22,680 INFO    Shape of y_train: (64, 4)
2023-06-23 13:21:22,682 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:21:22,682 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(32, 4)
(48, 4)
Train data shape: (73850, 157) with 31 unique jobid compid combos
Train label dist
0    32
Name: binary_anom, dtype: int64
Test data shape: (274527, 157)
Test label dist
1    80
0    48
Name: binary_anom, dtype: int64


2023-06-23 13:21:25,631 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:23<00:00,  1.71it/s]
2023-06-23 13:21:49,897 INFO    Feature extraction: Before dropping NaNs: (32, 2000)
2023-06-23 13:21:49,908 INFO    Feature extraction: Dropped NaNs: (32, 2000)
2023-06-23 13:21:50,332 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:36<00:00,  2.42s/it]
2023-06-23 13:23:28,274 INFO    Feature extraction: Before dropping NaNs: (128, 2000)
2023-06-23 13:23:28,287 INFO    Feature extraction: Dropped NaNs: (128, 2000)
2023-06-23 13:23:28,383 INFO    x_test is not None, scaling
2023-06-23 13:23:28,415 INFO    Scaler is saved
2023-06-23 13:23:28,416 INFO    X_train scaled shape: (32, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:23:49,916 INFO    Test data prediction results: [0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]
2023-06-23 13:23:49,918 INFO    Selected threshold value
: 0.08159495465744544


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        32
           1       0.00      0.00      0.00         0

    accuracy                           0.94        32
   macro avg       0.50      0.47      0.48        32
weighted avg       1.00      0.94      0.97        32

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.65      0.78        48
           1       0.82      1.00      0.90        80

    accuracy                           0.87       128
   macro avg       0.91      0.82      0.84       128
weighted avg       0.89      0.87      0.86       128

################################################################################################################################################
######### Experimental Configuration: 3  #########


2023-06-23 13:23:50,791 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:23:50,792 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:23:53,535 INFO    Data read successfully
2023-06-23 13:23:53,536 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:23:53,537 INFO    Shape of y_train: (64, 4)
2023-06-23 13:23:53,538 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:23:53,539 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(16, 4)
(64, 4)
Train data shape: (33914, 157) with 16 unique jobid compid combos
Train label dist
0    16
Name: binary_anom, dtype: int64
Test data shape: (314463, 157)
Test label dist
1    80
0    64
Name: binary_anom, dtype: int64


2023-06-23 13:23:56,781 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:12<00:00,  3.08it/s]
2023-06-23 13:24:10,658 INFO    Feature extraction: Before dropping NaNs: (16, 2000)
2023-06-23 13:24:10,669 INFO    Feature extraction: Dropped NaNs: (16, 2000)
2023-06-23 13:24:11,209 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:55<00:00,  2.89s/it]
2023-06-23 13:26:08,211 INFO    Feature extraction: Before dropping NaNs: (144, 2000)
2023-06-23 13:26:08,224 INFO    Feature extraction: Dropped NaNs: (144, 2000)
2023-06-23 13:26:08,319 INFO    x_test is not None, scaling
2023-06-23 13:26:08,352 INFO    Scaler is saved
2023-06-23 13:26:08,359 INFO    X_train scaled shape: (16, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:26:27,951 INFO    Test data prediction results: [0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1]
2023-06-23 13:26:27,954 INFO    Selected threshold value
: 0.12214605906012664


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.94      0.97        16
           1       0.00      0.00      0.00         0

    accuracy                           0.94        16
   macro avg       0.50      0.47      0.48        16
weighted avg       1.00      0.94      0.97        16

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.89      0.94        64
           1       0.92      1.00      0.96        80

    accuracy                           0.95       144
   macro avg       0.96      0.95      0.95       144
weighted avg       0.96      0.95      0.95       144

################################################################################################################################################
######### Experimental Configuration: 4  #########


2023-06-23 13:26:28,846 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:26:28,847 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:26:31,569 INFO    Data read successfully
2023-06-23 13:26:31,570 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:26:31,571 INFO    Shape of y_train: (64, 4)
2023-06-23 13:26:31,571 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:26:31,572 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(8, 4)
(72, 4)
Train data shape: (15513, 157) with 8 unique jobid compid combos
Train label dist
0    8
Name: binary_anom, dtype: int64
Test data shape: (332864, 157)
Test label dist
1    80
0    72
Name: binary_anom, dtype: int64


2023-06-23 13:26:34,032 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:06<00:00,  5.87it/s]
2023-06-23 13:26:41,581 INFO    Feature extraction: Before dropping NaNs: (8, 2000)
2023-06-23 13:26:41,592 INFO    Feature extraction: Dropped NaNs: (8, 2000)
2023-06-23 13:26:42,154 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:59<00:00,  3.00s/it]
2023-06-23 13:28:43,518 INFO    Feature extraction: Before dropping NaNs: (152, 2000)
2023-06-23 13:28:43,530 INFO    Feature extraction: Dropped NaNs: (152, 2000)
2023-06-23 13:28:43,627 INFO    x_test is not None, scaling
2023-06-23 13:28:43,659 INFO    Scaler is saved
2023-06-23 13:28:43,661 INFO    X_train scaled shape: (8, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:29:03,479 INFO    Test data prediction results: [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1]
2023-06-23 13:29:03,480 INFO    Selected threshold value
: 0.10649098226469916


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.88      0.93         8
           1       0.00      0.00      0.00         0

    accuracy                           0.88         8
   macro avg       0.50      0.44      0.47         8
weighted avg       1.00      0.88      0.93         8

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.17      0.29        72
           1       0.57      1.00      0.73        80

    accuracy                           0.61       152
   macro avg       0.79      0.58      0.51       152
weighted avg       0.77      0.61      0.52       152

################################################################################################################################################
######### Experimental Configuration: 5  #########


2023-06-23 13:29:04,465 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:29:04,466 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:29:07,490 INFO    Data read successfully
2023-06-23 13:29:07,491 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:29:07,492 INFO    Shape of y_train: (64, 4)
2023-06-23 13:29:07,493 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:29:07,493 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(4, 4)
(76, 4)
Train data shape: (6823, 157) with 4 unique jobid compid combos
Train label dist
0    4
Name: binary_anom, dtype: int64
Test data shape: (341554, 157)
Test label dist
1    80
0    76
Name: binary_anom, dtype: int64


2023-06-23 13:29:10,254 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:02<00:00, 17.67it/s]
2023-06-23 13:29:13,386 INFO    Feature extraction: Before dropping NaNs: (4, 2000)
2023-06-23 13:29:13,396 INFO    Feature extraction: Dropped NaNs: (4, 2000)
2023-06-23 13:29:13,943 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [02:07<00:00,  3.19s/it]
2023-06-23 13:31:22,858 INFO    Feature extraction: Before dropping NaNs: (156, 2000)
2023-06-23 13:31:22,869 INFO    Feature extraction: Dropped NaNs: (156, 2000)
2023-06-23 13:31:22,963 INFO    x_test is not None, scaling
2023-06-23 13:31:22,995 INFO    Scaler is saved
2023-06-23 13:31:22,997 INFO    X_train scaled shape: (4, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:31:43,259 INFO    Test data prediction results: [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1]
2023-06-23 13:31:43,261 INFO    Selected threshold value
: 0.14336617774221788


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.00      0.00      0.00         0

    accuracy                           0.75         4
   macro avg       0.50      0.38      0.43         4
weighted avg       1.00      0.75      0.86         4

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.28      0.43        76
           1       0.59      1.00      0.74        80

    accuracy                           0.65       156
   macro avg       0.80      0.64      0.59       156
weighted avg       0.79      0.65      0.59       156

################################################################################################################################################
######### Repeat Num: 2  #########
######### Experimental Configuration: 0  #########


2023-06-23 13:31:44,081 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:31:44,082 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:31:46,872 INFO    Data read successfully
2023-06-23 13:31:46,874 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:31:46,874 INFO    Shape of y_train: (64, 4)
2023-06-23 13:31:46,875 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:31:46,876 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(64, 4)
(16, 4)
Train data shape: (140468, 157) with 59 unique jobid compid combos
Train label dist
0    64
Name: binary_anom, dtype: int64
Test data shape: (207909, 157)
Test label dist
1    80
0    16
Name: binary_anom, dtype: int64


2023-06-23 13:31:49,780 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
2023-06-23 13:32:35,180 INFO    Feature extraction: Before dropping NaNs: (64, 2000)
2023-06-23 13:32:35,191 INFO    Feature extraction: Dropped NaNs: (64, 2000)
2023-06-23 13:32:35,696 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:18<00:00,  1.97s/it]
2023-06-23 13:33:55,767 INFO    Feature extraction: Before dropping NaNs: (96, 2000)
2023-06-23 13:33:55,779 INFO    Feature extraction: Dropped NaNs: (96, 2000)
2023-06-23 13:33:55,875 INFO    x_test is not None, scaling
2023-06-23 13:33:55,905 INFO    Scaler is saved
2023-06-23 13:33:55,906 INFO    X_train scaled shape: (64, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:34:33,323 INFO    Test data prediction results: [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:34:33,325 INFO    Selected threshold value
: 0.06747601172808862


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.95      0.98        64
           1       0.00      0.00      0.00         0

    accuracy                           0.95        64
   macro avg       0.50      0.48      0.49        64
weighted avg       1.00      0.95      0.98        64

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.75      0.86        16
           1       0.95      1.00      0.98        80

    accuracy                           0.96        96
   macro avg       0.98      0.88      0.92        96
weighted avg       0.96      0.96      0.96        96

################################################################################################################################################
######### Experimental Configuration: 1  #########


2023-06-23 13:34:34,308 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:34:34,309 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:34:37,332 INFO    Data read successfully
2023-06-23 13:34:37,332 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:34:37,333 INFO    Shape of y_train: (64, 4)
2023-06-23 13:34:37,334 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:34:37,334 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(48, 4)
(32, 4)
Train data shape: (101122, 157) with 46 unique jobid compid combos
Train label dist
0    48
Name: binary_anom, dtype: int64
Test data shape: (247255, 157)
Test label dist
1    80
0    32
Name: binary_anom, dtype: int64


2023-06-23 13:34:40,340 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:31<00:00,  1.28it/s]
2023-06-23 13:35:12,577 INFO    Feature extraction: Before dropping NaNs: (48, 2000)
2023-06-23 13:35:12,587 INFO    Feature extraction: Dropped NaNs: (48, 2000)
2023-06-23 13:35:12,982 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:37<00:00,  2.45s/it]
2023-06-23 13:36:52,102 INFO    Feature extraction: Before dropping NaNs: (112, 2000)
2023-06-23 13:36:52,115 INFO    Feature extraction: Dropped NaNs: (112, 2000)
2023-06-23 13:36:52,213 INFO    x_test is not None, scaling
2023-06-23 13:36:52,247 INFO    Scaler is saved
2023-06-23 13:36:52,248 INFO    X_train scaled shape: (48, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:37:28,074 INFO    Test data prediction results: [0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:37:28,075 INFO    Selected threshold value
: 0.07441205238694212


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.96      0.98        48
           1       0.00      0.00      0.00         0

    accuracy                           0.96        48
   macro avg       0.50      0.48      0.49        48
weighted avg       1.00      0.96      0.98        48

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.88      0.93        32
           1       0.95      1.00      0.98        80

    accuracy                           0.96       112
   macro avg       0.98      0.94      0.95       112
weighted avg       0.97      0.96      0.96       112

################################################################################################################################################
######### Experimental Configuration: 2  #########


2023-06-23 13:37:29,001 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:37:29,002 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:37:31,856 INFO    Data read successfully
2023-06-23 13:37:31,858 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:37:31,858 INFO    Shape of y_train: (64, 4)
2023-06-23 13:37:31,859 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:37:31,860 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(32, 4)
(48, 4)
Train data shape: (72836, 157) with 28 unique jobid compid combos
Train label dist
0    32
Name: binary_anom, dtype: int64
Test data shape: (275541, 157)
Test label dist
1    80
0    48
Name: binary_anom, dtype: int64


2023-06-23 13:37:34,913 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:22<00:00,  1.76it/s]
2023-06-23 13:37:58,688 INFO    Feature extraction: Before dropping NaNs: (32, 2000)
2023-06-23 13:37:58,698 INFO    Feature extraction: Dropped NaNs: (32, 2000)
2023-06-23 13:37:59,217 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:48<00:00,  2.72s/it]
2023-06-23 13:39:49,479 INFO    Feature extraction: Before dropping NaNs: (128, 2000)
2023-06-23 13:39:49,490 INFO    Feature extraction: Dropped NaNs: (128, 2000)
2023-06-23 13:39:49,587 INFO    x_test is not None, scaling
2023-06-23 13:39:49,619 INFO    Scaler is saved
2023-06-23 13:39:49,620 INFO    X_train scaled shape: (32, 2000)


Vae exists; will delete to be safe


2023-06-23 13:40:12,448 INFO    Test data prediction results: [0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1]
2023-06-23 13:40:12,450 INFO    Selected threshold value
: 0.08679477427283824


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        32

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.90      0.95        48
           1       0.94      1.00      0.97        80

    accuracy                           0.96       128
   macro avg       0.97      0.95      0.96       128
weighted avg       0.96      0.96      0.96       128

################################################################################################################################################
######### Experimental Configuration: 3  #########


2023-06-23 13:40:13,224 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:40:13,225 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:40:15,847 INFO    Data read successfully
2023-06-23 13:40:15,848 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:40:15,849 INFO    Shape of y_train: (64, 4)
2023-06-23 13:40:15,849 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:40:15,850 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(16, 4)
(64, 4)
Train data shape: (34155, 157) with 15 unique jobid compid combos
Train label dist
0    16
Name: binary_anom, dtype: int64
Test data shape: (314222, 157)
Test label dist
1    80
0    64
Name: binary_anom, dtype: int64


2023-06-23 13:40:18,845 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [00:11<00:00,  3.56it/s]
2023-06-23 13:40:31,085 INFO    Feature extraction: Before dropping NaNs: (16, 2000)
2023-06-23 13:40:31,096 INFO    Feature extraction: Dropped NaNs: (16, 2000)
2023-06-23 13:40:31,758 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:56<00:00,  2.91s/it]
2023-06-23 13:42:29,378 INFO    Feature extraction: Before dropping NaNs: (144, 2000)
2023-06-23 13:42:29,391 INFO    Feature extraction: Dropped NaNs: (144, 2000)
2023-06-23 13:42:29,486 INFO    x_test is not None, scaling
2023-06-23 13:42:29,523 INFO    Scaler is saved
2023-06-23 13:42:29,524 INFO    X_train scaled shape: (16, 2000)


Vae exists; will delete to be safe


2023-06-23 13:42:50,523 INFO    Test data prediction results: [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1]
2023-06-23 13:42:50,524 INFO    Selected threshold value
: 0.11609704890173354


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16

    accuracy                           1.00        16
   macro avg       1.00      1.00      1.00        16
weighted avg       1.00      1.00      1.00        16

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.81      0.90        64
           1       0.87      1.00      0.93        80

    accuracy                           0.92       144
   macro avg       0.93      0.91      0.91       144
weighted avg       0.93      0.92      0.92       144

################################################################################################################################################
######### Experimental Configuration: 4  #########


2023-06-23 13:42:51,363 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:42:51,364 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:42:54,125 INFO    Data read successfully
2023-06-23 13:42:54,126 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:42:54,127 INFO    Shape of y_train: (64, 4)
2023-06-23 13:42:54,128 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:42:54,129 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(8, 4)
(72, 4)
Train data shape: (17733, 157) with 8 unique jobid compid combos
Train label dist
0    8
Name: binary_anom, dtype: int64
Test data shape: (330644, 157)
Test label dist
1    80
0    72
Name: binary_anom, dtype: int64


2023-06-23 13:42:56,971 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:05<00:00,  6.67it/s]
2023-06-23 13:43:03,796 INFO    Feature extraction: Before dropping NaNs: (8, 2000)
2023-06-23 13:43:03,807 INFO    Feature extraction: Dropped NaNs: (8, 2000)
2023-06-23 13:43:04,353 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:58<00:00,  2.95s/it]
2023-06-23 13:45:03,750 INFO    Feature extraction: Before dropping NaNs: (152, 2000)
2023-06-23 13:45:03,763 INFO    Feature extraction: Dropped NaNs: (152, 2000)
2023-06-23 13:45:03,859 INFO    x_test is not None, scaling
2023-06-23 13:45:03,890 INFO    Scaler is saved
2023-06-23 13:45:03,892 INFO    X_train scaled shape: (8, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:45:26,461 INFO    Test data prediction results: [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
2023-06-23 13:45:26,464 INFO    Selected threshold value
: 0.09070379320427488


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.88      0.93         8
           1       0.00      0.00      0.00         0

    accuracy                           0.88         8
   macro avg       0.50      0.44      0.47         8
weighted avg       1.00      0.88      0.93         8

Classification Report in Test Data

              precision    recall  f1-score   support

           0       1.00      0.08      0.15        72
           1       0.55      1.00      0.71        80

    accuracy                           0.57       152
   macro avg       0.77      0.54      0.43       152
weighted avg       0.76      0.57      0.45       152

################################################################################################################################################
######### Experimental Configuration: 5  #########


2023-06-23 13:45:27,335 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_data.hdf
2023-06-23 13:45:27,336 ERROR   File not found!: /projectnb/peaclab-mon/aksar/deployment_experiments/eclipse/new_dataset_experiments/paper_dataset/prod_val_label.csv
2023-06-23 13:45:30,255 INFO    Data read successfully
2023-06-23 13:45:30,256 INFO    Shape of x_train: (145826, 159)
2023-06-23 13:45:30,257 INFO    Shape of y_train: (64, 4)
2023-06-23 13:45:30,258 INFO    Shape of x_test: (375239, 159)
2023-06-23 13:45:30,259 INFO    Shape of y_test: (176, 4)


(240, 4)
(521065, 157)
(4, 4)
(76, 4)
Train data shape: (11391, 157) with 4 unique jobid compid combos
Train label dist
0    4
Name: binary_anom, dtype: int64
Test data shape: (336986, 157)
Test label dist
1    80
0    76
Name: binary_anom, dtype: int64


2023-06-23 13:45:33,375 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 39/39 [00:04<00:00,  8.95it/s]
2023-06-23 13:45:38,743 INFO    Feature extraction: Before dropping NaNs: (4, 2000)
2023-06-23 13:45:38,754 INFO    Feature extraction: Dropped NaNs: (4, 2000)
2023-06-23 13:45:39,330 INFO    TSFRESH will use kind_to_fc_parameters
Feature Extraction: 100%|██████████| 40/40 [01:59<00:00,  2.99s/it]
2023-06-23 13:47:40,443 INFO    Feature extraction: Before dropping NaNs: (156, 2000)
2023-06-23 13:47:40,455 INFO    Feature extraction: Dropped NaNs: (156, 2000)
2023-06-23 13:47:40,554 INFO    x_test is not None, scaling
2023-06-23 13:47:40,588 INFO    Scaler is saved
2023-06-23 13:47:40,589 INFO    X_train scaled shape: (4, 2000)


Vae exists; will delete to be safe


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2023-06-23 13:48:02,389 INFO    Test data prediction results: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
2023-06-23 13:48:02,391 INFO    Selected threshold value
: 0.4324283274770996


Classification Report in Training Data

              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.00      0.00      0.00         0

    accuracy                           0.75         4
   macro avg       0.50      0.38      0.43         4
weighted avg       1.00      0.75      0.86         4

Classification Report in Test Data

              precision    recall  f1-score   support

           0       0.48      0.99      0.65        76
           1       0.00      0.00      0.00        80

    accuracy                           0.48       156
   macro avg       0.24      0.49      0.32       156
weighted avg       0.24      0.48      0.32       156

################################################################################################################################################
