In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import os
from os import listdir
from os.path import isfile, join
import re
import sys
import logging
from pathlib import Path
import argparse

from functools import reduce
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from csv import writer

#ML
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold

#UTILS
import sys
sys.path.insert(1,'/usr3/graduate/baksar/projectx/E2EWatch/utils/')

from utils import *
from config import Configuration
from datasets import EclipseDeploymentDataset

logging.basicConfig(format='%(asctime)s %(levelname)-7s %(message)s',
                    stream=sys.stderr, level=logging.DEBUG)
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)

In [3]:
class RuntimePredictor():
    
    
    def __init__(self, pickle_dir, 
                 pickle_name="eclipse_rf.pickle", 
                 feature_select = True, 
                 window_size = 60, 
                 granularity = 0,
                 DEBUG=True):
        
        """Init method
        
        - **parameters**, **types**, **return** and **return types**::
        
            :param pickle_dir: Directory that has the pickle model
            :param pickle_name: Pickle model name
            :param granularity: If the collected data is not collected per second granularity, specify this                
            :param window_size: If granularity is 0, it represents seconds, if granularity is 60, it represents minutes
        
        """
        
        self.pickle_dir = pickle_dir
        self.pickle_name = pickle_name
        self.window_size = window_size
        self.granularity = granularity
        self._load_model()
        self._load_scaler()
        self._load_anom_dict()
        self.feature_select = feature_select
        if self.feature_select:
            self._select_features()
        self.DEBUG = DEBUG
                
    def _select_features(self):
        
        #self.selected_features = open(self.pickle_dir / "selected_features.txt").read().splitlines()
        self.selected_features = pd.read_csv(self.pickle_dir / 'selected_features.csv')
        self.selected_features = self.selected_features['0'].values

        trial = []
        #When you use generate rolling features it doesn't add ::vmstat
        if self.feature_select:
            for feature in self.selected_features:
                trial.append(feature.split('::')[0])
            self.selected_features = trial
            
        logging.info('Loaded selected %d features', len(self.selected_features)) 
        
    def _load_model(self):
        '''Read the pickled model'''
        
        try:
            with open(self.pickle_dir / self.pickle_name, 'rb') as file:  
                self.model = pickle.load(file) 
                logging.info("Model loaded")
        except FileNotFoundError:
            logging.info("Model pickle doesn't exist")
            raise

    def _load_scaler(self):
        '''Read the pickled scaler'''
        
        try:
            with open(self.pickle_dir / 'scaler.pkl', 'rb') as file:  
                self.scaler = pickle.load(file) 
                logging.info("Scaler loaded")                
        except FileNotFoundError:
            logging.info("Scaler pickle doesn't exist")
            raise
            
            
    def _load_anom_dict(self):
        '''Read anom_dict to reverse encoding'''
        try:
            with open(self.pickle_dir / 'anom_dict.json') as f:
                self.anom_dict = json.load(f)              
        except FileNotFoundError:
            logging.info("Anomaly encoding dictionary doesn't exist")
            
    def _granularityAdjust(self,data,granularity=60):

        result = pd.DataFrame()
        for nid in data.index.get_level_values('node_id').unique():
            temp_data = data[data.index.get_level_values('node_id') == nid]
            temp_data = temp_data.iloc[ \
                (temp_data.index.get_level_values('timestamp').astype(int) -
                 int(temp_data.index.get_level_values('timestamp')[0])) \
                % granularity == 0]
            result = pd.concat([result,temp_data])

        return result    
            
    
    def predict_from_DF(self,runtime_data):
        
        """Process runtime monitoring data and make predictions with the existing model 

        Args:
            runtime_data: Dataframe that contains runtime HPC monitoring data

        Returns:
            Node by node runtime prediction results along with classifier confidence         
        """ 
        if not isinstance(runtime_data, pd.DataFrame):
            raise ValueError("should provide a pandas dataframe")
        
        #Drop NaN
        runtime_data.dropna(inplace=True)

        runtime_data['component_id'] = runtime_data['component_id'].astype(int)
        runtime_data = runtime_data.rename(columns={'component_id':'node_id'})
    
        round_factor = 1000 #Currently runtime data is collected every 60 seconds
        runtime_data['timestamp'] = round(runtime_data['timestamp'].astype(int) / round_factor)
        runtime_data['timestamp'] = runtime_data['timestamp'].astype(int) 
        runtime_data = runtime_data.set_index(['node_id','timestamp'])        
                
        #Per minute granularity data    
        if self.granularity != 0:
            runtime_data = self._granularityAdjust(runtime_data,granularity=60)
                
        ###Results will be stored in here
        node_results = pd.DataFrame()
        temp_feature_data = pd.DataFrame()
        
        logging.info("Preparing results for each node")
        for nid in runtime_data.index.get_level_values('node_id').unique():

            node_data = runtime_data.loc[nid,:,:]
                                    
            features = ['max', 'min', 'mean', 'std', 'skew', 'kurt','perc05', 'perc25', 'perc50', 'perc75', 'perc95'] 
            feature_train_data = pd.DataFrame()
            
            #return node_data

            if self.granularity != 0:    
                feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=0)
            else:
                feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=60,skip=15)
            
            return feature_data
            if self.feature_select:
                feature_data = feature_data[self.selected_features]                
                
            feature_data = pd.DataFrame(self.scaler.transform(feature_data),columns=feature_data.columns,index=feature_data.index)
                    
            #Testing pipeline
            preds_encoded = self.model.predict(feature_data)
            preds_prob = self.model.predict_proba(feature_data)

            preds = []
            for pred in preds_encoded:
                for key,value in self.anom_dict.items():
                    if value == pred:
                        preds.append(key)
                    
            node_data = feature_data

            timestamps = feature_data.index.get_level_values('timestamp').values            
            multiindex = list(zip(np.repeat(nid,len(timestamps)),timestamps))
            index = pd.MultiIndex.from_tuples(multiindex, names=['node_id', 'timestamp'])
            temp_results = pd.DataFrame(index=index)

            temp_results['preds'] = preds
            temp_results['prob'] = np.max(preds_prob,axis=1)
            node_results = pd.concat([node_results,temp_results])    

        return node_results                                                    
                               
    def TEST_predict_from_DF(self,runtime_data):
        
        """This is a TEST function for offline testing of the job data. 
        This function can be used to test job data inside the training/test set
        Format is not as same as runtime data returns by the RuntimeFramework so do NOT
        use for runtime results

        Args:
            runtime_data: Dataframe that contains runtime HPC monitoring data
            Dataframe can contain either one node data or multiple node data

        Returns:
            Node by node runtime prediction results along with classifier confidence         
        """ 
        if not isinstance(runtime_data, pd.DataFrame):
            raise ValueError("should provide a pandas dataframe")
                    
        new_columns = [column.split("::")[0] for column in runtime_data.columns]
        runtime_data.columns = new_columns
        
        if self.granularity != 0:                    
            runtime_data = self._granularityAdjust(runtime_data,granularity=60)
                
        node_results = pd.DataFrame()
        temp_feature_data = pd.DataFrame()
        
        logging.info("Preparing results for each node")
        for nid in runtime_data.index.get_level_values('node_id').unique():

            node_data = runtime_data.loc[nid,:,:]
                                    
            features = ['max', 'min', 'mean', 'std', 'skew', 'kurt','perc05', 'perc25', 'perc50', 'perc75', 'perc95'] 
            feature_train_data = pd.DataFrame()

            if self.granularity != 0:    
                feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=0)
#                 feature_data = generate_rolling_features(node_data,features=features,window_size=3,trim=0)                
            else:
                feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=60,skip=15)

            if self.feature_select:
                feature_data = feature_data[self.selected_features]                
                
            feature_data = pd.DataFrame(self.scaler.transform(feature_data),columns=feature_data.columns,index=feature_data.index)
                    
            #Testing pipeline
            preds_encoded = self.model.predict(feature_data)
            preds_prob = self.model.predict_proba(feature_data)

            preds = []
            for pred in preds_encoded:
                for key,value in self.anom_dict.items():
                    if value == pred:
                        preds.append(key)
                    
            node_data = feature_data

            timestamps = feature_data.index.get_level_values('timestamp').values            
            multiindex = list(zip(np.repeat(nid,len(timestamps)),timestamps))
            index = pd.MultiIndex.from_tuples(multiindex, names=['node_id', 'timestamp'])
            temp_results = pd.DataFrame(index=index)

            temp_results['preds'] = preds
            temp_results['prob'] = np.max(preds_prob,axis=1)
            node_results = pd.concat([node_results,temp_results])    

        return node_results                                                    


## Testing the Saved Model


In [4]:
MODEL = 'lgbm'
MODEL_FOLDER = '{}_final_60sec'.format(MODEL)
FS = 1
MODEL_NAME = 'eclipse_{}'.format(MODEL)

if FS:
    MODEL_NAME = MODEL_NAME + '-fs'
    
WINDOW_SIZE = 60
GRANULARITY = 0

PICKLE_DIR = Path('/usr3/graduate/baksar/projectx/E2EWatch/Models/{}'.format(MODEL_FOLDER))

In [5]:
conf = Configuration(ipython=True,
                     overrides={
                         'system' : 'eclipse',
                         'operation':'label_generate',                                                                            
                         'exp_name':'final_window_{}sec'.format(WINDOW_SIZE), 
                         'hdf_data_path': Path('/projectnb/peaclab-mon/aksar/datasets/eclipse_final_hdfs'),                         
#Model trained with minute granularity data no windowing                         
#                          'exp_name':'eclipse_final_window_0min', 
#                          'hdf_data_path': Path('/projectnb/peaclab-mon/aksar/datasets/eclipse_final_minute_hdfs'),                         
                         'model_config': 'random_forest',                         
                         #Label Generation
                         'num_split': 5,
                         #Data Generation                          
                         'cv_fold':0, #Required only for data_generate and pipeline options
                         'granularity': GRANULARITY,
                         'windowing': True,                         
                         'window_size' : WINDOW_SIZE,                         
                         'feature_extract': True,
                         'feature_select': [True if FS else False], 
                     })

2021-02-17 17:45:08,057 INFO    Setting directory names
2021-02-17 17:45:08,073 INFO    Model config folder already exists, be careful, otherwise it will overwrite!
2021-02-17 17:45:08,078 INFO    Saving configuration


# The configuration used for this run:
# {'cv_fold': 0,
#  'exp_name': 'final_window_60sec',
#  'experiment_dir': PosixPath('/projectnb/peaclab-mon/aksar/models/DeploymentModels/eclipse/final_window_60sec'),
#  'feature_extract': True,
#  'feature_select': [True],
#  'granularity': 0,
#  'hdf_data_path': PosixPath('/projectnb/peaclab-mon/aksar/datasets/eclipse_final_hdfs'),
#  'metadata_path': None,
#  'model_config': 'random_forest',
#  'model_config_dir': PosixPath('/projectnb/peaclab-mon/aksar/models/DeploymentModels/eclipse/final_window_60sec/CV_0/random_forest'),
#  'model_dir': PosixPath('/projectnb/peaclab-mon/aksar/models/DeploymentModels/eclipse/final_window_60sec/CV_0/random_forest/model'),
#  'num_split': 5,
#  'operation': 'label_generate',
#  'output_dir': PosixPath('/projectnb/peaclab-mon/aksar/models/DeploymentModels/eclipse'),
#  'plots_dir': PosixPath('/projectnb/peaclab-mon/aksar/models/DeploymentModels/eclipse/final_window_60sec/CV_0/random_forest/model/plots'),
#  '

In [6]:
eclipseDataset = EclipseDeploymentDataset(conf)

2021-02-17 17:45:08,105 INFO    BaseDataset Class Initialization
2021-02-17 17:45:08,106 INFO    HPCDataset Class Initialization
2021-02-17 17:45:08,107 INFO    EclipseDeploymentDataset Class Initialization


In [7]:
X_train, y_train, X_test, y_test = eclipseDataset.load_dataset(scalerSave=False)

2021-02-17 17:49:51,268 INFO    Train data shape (1597793, 699)
2021-02-17 17:49:51,273 INFO    Train label shape (1597793, 2)
2021-02-17 17:49:51,274 INFO    Test data shape (401853, 699)
2021-02-17 17:49:51,274 INFO    Test label shape (401853, 2)


In [8]:
predictor = RuntimePredictor(pickle_dir=PICKLE_DIR, 
                             feature_select=FS, 
                             window_size = WINDOW_SIZE,
                             granularity = GRANULARITY,
                             pickle_name="{}.pickle".format(MODEL_NAME))

2021-02-17 17:50:27,042 INFO    Model loaded
2021-02-17 17:50:27,056 INFO    Scaler loaded
2021-02-17 17:50:27,087 INFO    Loaded selected 699 features


In [9]:
deployment_model_columns = X_test.columns
converted_model_columns = [column.split("::")[0] for column in deployment_model_columns]

#Features are the same but the order is mixed need to fix
converted_model_columns == predictor.selected_features

True

In [10]:
#Test the loaded model with all test data 
logging.info("Testing pipeline!")
preds = predictor.model.predict(X_test)

logging.info("Generating report!")
### Saves classification report where all apps are combined
report_dict = classification_report(y_true=y_test['anom'].astype('int'), y_pred=preds, labels=y_test['anom'].unique())
print(report_dict)

2021-02-17 17:50:28,826 INFO    Testing pipeline!
2021-02-17 17:51:22,535 INFO    Generating report!


              precision    recall  f1-score   support

           0       1.00      1.00      1.00    106828
           1       0.95      0.94      0.94     60766
           2       0.91      0.64      0.75     60626
           3       0.79      0.95      0.86     88491
           4       1.00      1.00      1.00     85142

    accuracy                           0.93    401853
   macro avg       0.93      0.91      0.91    401853
weighted avg       0.93      0.93      0.92    401853



### Select a node_id that exist in the test data and inside the 8 16 node HDF list

In [11]:
final_metadata = pd.read_csv(conf['hdf_data_path'] / 'metadata.csv')

In [12]:
final_test_metadata = final_metadata[final_metadata['node_id'].isin(y_test.index)]
final_test_metadata

Unnamed: 0,appname,jobid,time,node_id,anomaly,anomaly_input,problem_name
0,ExaMiniMD,7363487,1497,468e64844c5443fb8f1234dd67ae5e9e,cpuoccupy,1,cosineEclipse4
7,ExaMiniMD,7363488,1492,215a9e29f2af419bbeb0c61d90fca532,cpuoccupy,2,cosineEclipse4
13,ExaMiniMD,7363491,1486,04a535c71c2749e9812afb754f5775be,memleak,1,cosineEclipse4
18,ExaMiniMD,7363494,1502,5fb833ae363b4e4d8efdf9c786b9c029,cachecopy,1,cosineEclipse4
23,ExaMiniMD,7363495,1497,687c9317923041f9ade7a7a70451e719,cachecopy,2,cosineEclipse4
...,...,...,...,...,...,...,...
24787,HACC,7497045,745,6bd8f05f10114c77aef779fb9b2893f3,,e,cosineEclipse16
24796,HACC,7497046,1038,c61d40b71f804862b2079fe7e65f978e,,e,cosineEclipse8
24797,HACC,7497046,1038,fa22e4920419438d8dfd3b0ba7340cd3,,e,cosineEclipse8
24810,HACC,7497048,749,f08e3e7c5fcd47fbbe726d909c9dc340,,e,cosineEclipse16


In [99]:
final_test_metadata[(final_test_metadata['problem_name'] == 'cosineEclipse16') & (final_test_metadata['anomaly'] == 'memleak')]

Unnamed: 0,appname,jobid,time,node_id,anomaly,anomaly_input,problem_name
3941,ExaMiniMD,7487827,1736,366cd34af5764ba5ae57cfb807ae7d32,memleak,1,cosineEclipse16
3942,ExaMiniMD,7487827,1736,0c4e9c2acae44fda907752b3fe68c9b8,memleak,1,cosineEclipse16
3943,ExaMiniMD,7487827,1736,333ddca07a4f4091a4d83cb6b0bd179f,memleak,1,cosineEclipse16
4052,ExaMiniMD,7488037,1728,9183d8710f08475692eecc9377fdf83a,memleak,k,cosineEclipse16
4062,ExaMiniMD,7488040,1727,8bd05bd653a849fa9d23f0992fd6afb9,memleak,1,cosineEclipse16
...,...,...,...,...,...,...,...
24624,HACC,7497023,739,f1cf58de489c46c4b5f9b8462255d4eb,memleak,2,cosineEclipse16
24625,HACC,7497023,739,b0d847b845384af2a65c235a0ff8a8dc,memleak,2,cosineEclipse16
24632,HACC,7497023,739,f0f94081b6a94d61b6e85cb231f59e57,memleak,2,cosineEclipse16
24634,HACC,7497023,739,24b917badfe44714a26ac11a1ea00b21,memleak,2,cosineEclipse16


In [100]:
NODE_ID = 'f1cf58de489c46c4b5f9b8462255d4eb'

In [101]:
y_test.loc[NODE_ID]

Unnamed: 0_level_0,app,anom
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4
f1cf58de489c46c4b5f9b8462255d4eb,5,4


In [102]:
DATA_INDEX = np.where(y_test.index == NODE_ID)[0][0]
ORIG_VALUES = X_test.iloc[DATA_INDEX].values

In [103]:
#Testing pipeline
preds_encoded = predictor.model.predict(ORIG_VALUES.reshape(1,-1))
preds_prob = predictor.model.predict_proba(ORIG_VALUES.reshape(1,-1))

preds = []
for pred in preds_encoded:
    for key,value in predictor.anom_dict.items():
        if value == pred:
            preds.append(key)



In [104]:
preds,np.argmax(preds_prob)

(['memleak'], 4)

## Testing Runtime Module with Data from Test Set - 4 8 16 Node Runs

In [13]:
CSV_PATH = Path('/projectnb/peaclab-mon/aksar/datasets/eclipse_multi_out')
metadata = pd.read_csv(CSV_PATH / 'label_metadata.csv')
metadata

Unnamed: 0,appname,jobid,time,node_id,anomaly,anomaly_input,problem_name
0,ExaMiniMD,7487818,909,3feefe7932784ac48c71abdc60ba82c3,cpuoccupy,1,cosineEclipse8
1,ExaMiniMD,7487818,909,cdff2838bb654aecb60b43314aa29f1c,cpuoccupy,1,cosineEclipse8
2,ExaMiniMD,7487818,909,bda66573e669477bb461e2dcf8728af9,cpuoccupy,1,cosineEclipse8
3,ExaMiniMD,7487818,909,4abc9a2b42b04648a6c899fb220baf5d,cpuoccupy,1,cosineEclipse8
4,ExaMiniMD,7487818,909,3b78f8c694ec4e279bedcdb96b12200e,cpuoccupy,1,cosineEclipse8
...,...,...,...,...,...,...,...
36144,HACC,7497048,749,9f1ac5cb38964092bf5d596ad046c43e,,e,cosineEclipse16
36145,HACC,7497048,749,c1c9f11321434a93b08b7a5c760ee900,,e,cosineEclipse16
36146,HACC,7497048,749,d5735cdfc75c4e0994a9d704cca1cb33,,e,cosineEclipse16
36147,HACC,7497048,749,db2dee42c2ff488a908374e4fb437a94,,e,cosineEclipse16


In [15]:
#JOB_ID = '7497048' #-> HACC None
#JOB_ID = '7487818' #-> ExaMiniMD cpuoccupy - Predicts generally wrong
#JOB_ID = '7497123' #-> Lammps None
JOB_ID = '7487764' #-> Lammps None
#JOB_ID = '7496394' #-> Have in sample data
metadata[metadata['jobid'] == int(JOB_ID)]

Unnamed: 0,appname,jobid,time,node_id,anomaly,anomaly_input,problem_name
23086,SWFFT,7487764,1059,2f1365c027ed48b0a3e33be70090f50f,membw,1,cosineEclipse8
23087,SWFFT,7487764,1059,68da92c652d346c9b1a8eebbeb072378,membw,1,cosineEclipse8
23088,SWFFT,7487764,1059,0c2aeb47a37145719c29c84bede6a303,membw,1,cosineEclipse8
23089,SWFFT,7487764,1059,6c2f19c0eed24fe3a53ba33dc6404248,membw,1,cosineEclipse8
23090,SWFFT,7487764,1059,121b64b5a00b4afc804ce44ebc058bbe,membw,1,cosineEclipse8
23091,SWFFT,7487764,1059,6825da3c8068484aaf4848e8366d6840,membw,1,cosineEclipse8
23092,SWFFT,7487764,1059,907c884ae5904d60824218561979e383,membw,1,cosineEclipse8
23093,SWFFT,7487764,1059,bb2c900d72654b628850f66c085c2876,membw,1,cosineEclipse8


In [16]:
#JOB_ID = metadata[metadata['node_id'] == NODE_ID]['jobid'].values[0]
CSV_FILE = str(JOB_ID) + '.csv'
job_data = pd.read_csv(CSV_PATH / CSV_FILE)
job_data.set_index(['node_id','timestamp'],inplace=True)
job_data = job_data[[x for x in job_data.columns if 'per_core' not in x]]
job_data

Unnamed: 0_level_0,Unnamed: 1_level_0,MemFree::meminfo,MemAvailable::meminfo,Buffers::meminfo,Cached::meminfo,SwapCached::meminfo,Active::meminfo,Inactive::meminfo,Active(anon)::meminfo,Inactive(anon)::meminfo,Active(file)::meminfo,...,compact_success::vmstat,htlb_buddy_alloc_success::vmstat,htlb_buddy_alloc_fail::vmstat,unevictable_pgs_culled::vmstat,unevictable_pgs_scanned::vmstat,unevictable_pgs_rescued::vmstat,unevictable_pgs_mlocked::vmstat,unevictable_pgs_munlocked::vmstat,unevictable_pgs_cleared::vmstat,unevictable_pgs_stranded::vmstat
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0c2aeb47a37145719c29c84bede6a303,1606134603,128400884,128073592,0,284112,0,325280,265544,309248,214436,16032,...,0,0,0,0,0,0,0,0,0,0
0c2aeb47a37145719c29c84bede6a303,1606134604,128400852,128073560,0,284112,0,325280,265544,309248,214436,16032,...,0,0,0,0,0,0,0,0,0,0
0c2aeb47a37145719c29c84bede6a303,1606134605,128400892,128073600,0,284112,0,325280,265544,309248,214436,16032,...,0,0,0,0,0,0,0,0,0,0
0c2aeb47a37145719c29c84bede6a303,1606134606,128400892,128073600,0,284112,0,325280,265544,309248,214436,16032,...,0,0,0,0,0,0,0,0,0,0
0c2aeb47a37145719c29c84bede6a303,1606134607,128401016,128073724,0,284116,0,325280,265544,309248,214436,16032,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
bb2c900d72654b628850f66c085c2876,1606135655,127986632,127712436,0,278132,0,733124,245424,701296,109792,31828,...,0,0,0,0,0,0,0,0,0,0
bb2c900d72654b628850f66c085c2876,1606135656,127986632,127712436,0,278132,0,733124,245424,701296,109792,31828,...,0,0,0,0,0,0,0,0,0,0
bb2c900d72654b628850f66c085c2876,1606135657,127986632,127712436,0,278132,0,733124,245424,701296,109792,31828,...,0,0,0,0,0,0,0,0,0,0
bb2c900d72654b628850f66c085c2876,1606135658,127986796,127712600,0,278132,0,733124,245424,701296,109792,31828,...,0,0,0,0,0,0,0,0,0,0


In [17]:
predictor = RuntimePredictor(pickle_dir=PICKLE_DIR, 
                             feature_select=FS, 
                             window_size = WINDOW_SIZE,
                             granularity = GRANULARITY,
                             pickle_name="{}.pickle".format(MODEL_NAME))

job_results = predictor.TEST_predict_from_DF(job_data)
#node_data, temp_results, preds,preds_prob = predictor.TEST_predict_from_DF(job_data)
#temp_results, preds,preds_prob = predictor.TEST_predict_from_DF(job_data)
#node_results = predictor.TEST_predict_from_DF(job_data)
#node_data, feature_data, preds,preds_prob = predictor.TEST_predict_from_DF(job_data)

2021-02-17 17:58:03,695 INFO    Model loaded
2021-02-17 17:58:03,696 INFO    Scaler loaded
2021-02-17 17:58:03,701 INFO    Loaded selected 699 features
2021-02-17 17:58:03,707 INFO    Preparing results for each node


In [18]:
job_results

Unnamed: 0_level_0,Unnamed: 1_level_0,preds,prob
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
0c2aeb47a37145719c29c84bede6a303,1606134722,membw,0.803635
0c2aeb47a37145719c29c84bede6a303,1606134737,membw,0.818833
0c2aeb47a37145719c29c84bede6a303,1606134752,membw,0.866815
0c2aeb47a37145719c29c84bede6a303,1606134767,membw,0.852409
0c2aeb47a37145719c29c84bede6a303,1606134782,membw,0.803402
...,...,...,...
bb2c900d72654b628850f66c085c2876,1606135532,membw,0.827707
bb2c900d72654b628850f66c085c2876,1606135547,membw,0.814494
bb2c900d72654b628850f66c085c2876,1606135562,membw,0.879346
bb2c900d72654b628850f66c085c2876,1606135577,membw,0.848138


In [21]:
job_results.index.get_level_values('node_id').unique()

Index(['0c2aeb47a37145719c29c84bede6a303', '121b64b5a00b4afc804ce44ebc058bbe',
       '2f1365c027ed48b0a3e33be70090f50f', '6825da3c8068484aaf4848e8366d6840',
       '68da92c652d346c9b1a8eebbeb072378', '6c2f19c0eed24fe3a53ba33dc6404248',
       '907c884ae5904d60824218561979e383', 'bb2c900d72654b628850f66c085c2876'],
      dtype='object', name='node_id')

In [33]:
job_results[job_results.index.get_level_values('node_id') == '0c2aeb47a37145719c29c84bede6a303']

Unnamed: 0_level_0,Unnamed: 1_level_0,preds,prob
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
0c2aeb47a37145719c29c84bede6a303,1606134722,membw,0.803635
0c2aeb47a37145719c29c84bede6a303,1606134737,membw,0.818833
0c2aeb47a37145719c29c84bede6a303,1606134752,membw,0.866815
0c2aeb47a37145719c29c84bede6a303,1606134767,membw,0.852409
0c2aeb47a37145719c29c84bede6a303,1606134782,membw,0.803402
0c2aeb47a37145719c29c84bede6a303,1606134797,membw,0.829239
0c2aeb47a37145719c29c84bede6a303,1606134812,membw,0.810719
0c2aeb47a37145719c29c84bede6a303,1606134827,membw,0.831618
0c2aeb47a37145719c29c84bede6a303,1606134842,membw,0.823329
0c2aeb47a37145719c29c84bede6a303,1606134857,membw,0.87716


In [None]:
job_results[job_results.index.get_level_values('node_id') == '0c2aeb47a37145719c29c84bede6a303']

## Test the Model with Random Job Data in Eclipse

In [16]:
### To get results from runtime data saved as CSV
MODEL = 'lgbm'
MODEL_FOLDER = '{}_final_60sec'.format(MODEL)
FS = 1
MODEL_NAME = 'eclipse_{}'.format(MODEL)

if FS:
    MODEL_NAME = MODEL_NAME + '-fs'
    
WINDOW_SIZE = 60
GRANULARITY = 0

PICKLE_DIR = Path('/usr3/graduate/baksar/projectx/E2EWatch/Models/{}'.format(MODEL_FOLDER))
RUNTIME_DATA = Path('/usr3/graduate/baksar/projectx/E2EWatch/Runtime/sample_data')
RUNTIME_OUT = RUNTIME_DATA / 'OUT'
CSV_DATA = RUNTIME_DATA / '7496394.csv'

In [17]:
runtime_data = pd.read_csv(CSV_DATA)
try:
    runtime_data.drop(columns=['Unnamed: 0'],inplace=True)
except:
    print("Go ahead")
runtime_data.head()

Unnamed: 0,timestamp,component_id,MemFree,MemAvailable,Buffers,Cached,SwapCached,Active,Inactive,Active(anon),...,compact_success,htlb_buddy_alloc_success,htlb_buddy_alloc_fail,unevictable_pgs_culled,unevictable_pgs_scanned,unevictable_pgs_rescued,unevictable_pgs_mlocked,unevictable_pgs_munlocked,unevictable_pgs_cleared,unevictable_pgs_stranded
0,1606474000000.0,5.0,128481764.0,128149516.0,0.0,112944.0,0.0,398268.0,101604.0,386660.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1,1606474000000.0,0.0,128962204.0,128636192.0,0.0,137304.0,0.0,212812.0,120956.0,197668.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
2,1606474000000.0,7.0,128751988.0,128412980.0,0.0,112132.0,0.0,354812.0,95416.0,338920.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
3,1606474000000.0,2.0,128858872.0,128518380.0,0.0,111860.0,0.0,344836.0,95248.0,330532.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
4,1606474000000.0,4.0,129023032.0,128685404.0,0.0,112212.0,0.0,211344.0,95464.0,195388.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0


In [18]:
predictor = RuntimePredictor(pickle_dir=PICKLE_DIR, 
                             feature_select=FS, 
                             window_size = WINDOW_SIZE,
                             granularity = GRANULARITY,
                             pickle_name="{}.pickle".format(MODEL_NAME))

2021-02-15 11:31:57,334 INFO    Model loaded
2021-02-15 11:31:57,335 INFO    Scaler loaded
2021-02-15 11:31:57,340 INFO    Loaded selected 699 features


In [19]:
node_data = predictor.predict_from_DF(runtime_data)
node_data

2021-02-15 11:32:00,080 INFO    Preparing results for each node


Unnamed: 0_level_0,max_MemFree,max_MemAvailable,max_Buffers,max_Cached,max_SwapCached,max_Active,max_Inactive,max_Active(anon),max_Inactive(anon),max_Active(file),...,perc95_compact_success,perc95_htlb_buddy_alloc_success,perc95_htlb_buddy_alloc_fail,perc95_unevictable_pgs_culled,perc95_unevictable_pgs_scanned,perc95_unevictable_pgs_rescued,perc95_unevictable_pgs_mlocked,perc95_unevictable_pgs_munlocked,perc95_unevictable_pgs_cleared,perc95_unevictable_pgs_stranded
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1606474260,89156088.0,88881688.0,0.0,362440.0,0.0,47303464.0,337540.0,47279496.0,188396.0,23968.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606474275,87059036.0,86784636.0,0.0,362444.0,0.0,49401716.0,337540.0,49377744.0,188396.0,23972.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606474290,84956988.0,84682592.0,0.0,362452.0,0.0,51499936.0,337540.0,51475956.0,188396.0,23980.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606474305,82853700.0,82579308.0,0.0,362456.0,0.0,53598248.0,337544.0,53574268.0,188396.0,23980.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606474320,80751048.0,80476656.0,0.0,362460.0,0.0,69573956.0,337544.0,69549968.0,188396.0,23988.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1606476420,58578300.0,58304188.0,0.0,363132.0,0.0,69578320.0,334540.0,69550724.0,188344.0,27596.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606476435,58578208.0,58304100.0,0.0,363136.0,0.0,69578324.0,334540.0,69550724.0,188344.0,27600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606476450,58578208.0,58304100.0,0.0,363140.0,0.0,69578324.0,334532.0,69550724.0,188344.0,27604.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1606476465,58578196.0,58304096.0,0.0,363148.0,0.0,69578324.0,334528.0,69550724.0,188344.0,27608.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0


In [10]:
job_results = predictor.predict_from_DF(runtime_data)
job_results

2021-02-15 11:19:12,122 INFO    Preparing results for each node


Unnamed: 0_level_0,Unnamed: 1_level_0,preds,prob
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
5,1606474260,memleak,0.525755
5,1606474275,memleak,0.729963
5,1606474290,memleak,0.606039
5,1606474305,memleak,0.666988
5,1606474320,,0.830952
...,...,...,...
1,1606476421,,0.992333
1,1606476436,,0.986373
1,1606476451,,0.985768
1,1606476466,,0.976384


In [556]:
# #If you need to check how runtime data looks like
# runtime_data.dropna(inplace=True)
# runtime_data['component_id'] = runtime_data['component_id'].astype(int)
# runtime_data = runtime_data.rename(columns={'component_id':'node_id'})

# round_factor = 1000 #Currently runtime data is collected every 60 seconds
# runtime_data['timestamp'] = round(runtime_data['timestamp'].astype(int) / round_factor)
# runtime_data['timestamp'] = runtime_data['timestamp'].astype(int) 
# runtime_data = runtime_data.set_index(['node_id','timestamp'])

# runtime_data.loc[700024]

## Test the model with Outlier Job Data in Eclipse

In [None]:
### To get results from runtime data saved as CSV
PICKLE_OUT = Path('/usr3/graduate/baksar/projectx/ADF-Deployment/Models/rf_sampled_nowindow')
OUTLIER_PATH = Path('/usr3/graduate/baksar/projectx/ADF-Deployment/Runtime/sample_data')

CSV_DATA = RUNTIME_DATA / '12314.csv'

In [None]:
#     def predict_from_CSV(self, job_data_path):
           
#         """
#         Process runtime monitoring data and make predictions with the existing model and saves it 

#         Args:
#             job_data_path: Specify absolute path to the CSV file that stores runtime monitoring data
#         Returns:
#             None
#         """     

#         runtime_data = pd.read_csv(job_data_path)
#         runtime_data.dropna(inplace=True)

#         runtime_data['component_id'] = runtime_data['component_id'].astype(int)
#         runtime_data = runtime_data.rename(columns={'component_id':'node_id'})

#         round_factor = 1000 #Currently runtime data is collected every 60 seconds
#         runtime_data['timestamp'] = round(runtime_data['timestamp'].astype(int) / round_factor)
#         runtime_data['timestamp'] = runtime_data['timestamp'].astype(int) 
#         runtime_data = runtime_data.set_index(['node_id','timestamp'])

#         runtime_data_columns = list(runtime_data.columns)
#         model_column_names = []
#         txt_columns = open(self.pickle_dir / "eclipse_column_names.txt").readlines()
#         for line in txt_columns:
#             line=line.strip()
#             model_column_names.append(line)

#         model_column_set = set([column.split("::")[0] for column in model_column_names])
#         runtime_column_set = set(runtime_data_columns)
#         diff_cols = runtime_column_set - model_column_set

#         try:
#             assert len(diff_cols) == 0
#         except:
#             raise Exception("Columns of the input data have to same as model columns")

#         ###Use the loaded scaler
#         runtime_data = pd.DataFrame(self.scaler.transform(runtime_data), columns=runtime_data.columns, index=runtime_data.index)
            
#         ###For each node_id, select common_time 
#         unique_nids = runtime_data.index.get_level_values('node_id').unique()

#         common_time = []
#         for nid in unique_nids:
                
#             node_time = list(runtime_data[runtime_data.index.get_level_values('node_id') == nid].index.get_level_values('timestamp'))
#             if self.DEBUG:
#                 logging.info("Node id: %s", nid)
#                 logging.info("Node timestamp length: %s", len(node_time))
#                 logging.info("Common time length: %s", len(common_time))
#             if not len(common_time):
#                 common_time = node_time
#             else:
#                 common_time = reduce(np.intersect1d,(common_time,node_time))


#         #Select common timestamps among all nodes
#         runtime_data = runtime_data[runtime_data.index.get_level_values('timestamp').isin(common_time)]    


#         ###Results will be stored in here
#         node_results = pd.DataFrame()
#         logging.info("Preparing results for each node")
#         for nid in runtime_data.index.get_level_values('node_id').unique():

#             node_data = runtime_data.loc[nid,:,:]

#             if self.feature_select:
#                 logging.info("Feature Selection")
#                 features = ['max', 'min', 'mean', 'std', 'skew', 'kurt','perc05', 'perc25', 'perc50', 'perc75', 'perc95'] 
#                 feature_train_data = pd.DataFrame()
                                
#                 if self.granularity != 0:    
#                     feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=0)
#                 else:
#                     feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=30)
#                 feature_data = feature_data[self.selected_features]
                                
#             else:
#                 logging.info("NO Feature Selection")
#                 features = ['max', 'min', 'mean', 'std', 'skew', 'kurt','perc05', 'perc25', 'perc50', 'perc75', 'perc95'] 
#                 feature_train_data = pd.DataFrame()
                
#                 #Call for every node
#                 if self.granularity != 0:
# #                     getter = WindowShopper(node_data, None, trim=0, window_size=self.window_size)
#                     feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=0)                    
#                 else:
# #                     getter = WindowShopper(node_data, None, window_size=self.window_size)
#                     feature_data = generate_rolling_features(node_data,features=features,window_size=self.window_size,trim=30)                
# #                windowed_data, _ = getter.return_windowed_dataset()

#                 #Generate Features
# #                 feature_generator = TSFeatureGenerator(trim=0) #Don't change the trim
# #                 feature_data = feature_generator.transform(windowed_data)
                
                
#             #Generate results
#             preds_encoded = self.model.predict(feature_data)
#             preds_prob = self.model.predict_proba(feature_data)

#             preds = []
#             for pred in preds_encoded:
#                 for key,value in self.anom_dict.items():
#                     if value == pred:
#                         preds.append(key)
                        
#             #FIXME: length of timestamps are sometimes funky, check this out and creates length mismatch between len(preds)                  
#             timestamps = node_data.index.get_level_values('timestamp')[0:len(node_data.index.get_level_values('timestamp'))-self.window_size+1].values
#             multiindex = list(zip(np.repeat(nid,len(timestamps)),timestamps))
#             index = pd.MultiIndex.from_tuples(multiindex, names=['node_id', 'timestamp'])
#             temp_results = pd.DataFrame(index=index)
#             temp_results['preds'] = preds
#             temp_results['prob'] = np.max(preds_prob,axis=1)
#             node_results = pd.concat([node_results,temp_results])    

#         #Write results to .csv
#         csv_name = str(CSV_DATA).split('/')[-1].split('.')[0]
#         node_results.to_csv(self.pickle_dir / (csv_name + '_results.csv'))


## Testing Runtime Module with Data from Test Set

In [9]:
###Feed the runtime module with one job data (multiple node ids)
###DISCLAIMER: This doesn't work with models that has feature selection 
CSV_PATH = Path('/projectnb/peaclab-mon/aksar/datasets/eclipse_normal_out')
PICKLE_OUT = Path('/usr3/graduate/baksar/projectx/ADF-Deployment/model')
RESULT_OUT = PICKLE_OUT / 'results'
JOB_ID = 6922183
CSV_FILE = str(JOB_ID) + '.csv'
LABEL_FILE = 'label_metadata.csv'

job_data = pd.read_csv(CSV_PATH / CSV_FILE)
job_data.set_index(['node_id','timestamp'],inplace=True)
job_data = job_data[[x for x in job_data.columns if 'per_core' not in x]]

label_data = pd.read_csv(CSV_PATH / LABEL_FILE)


temp_label_data = label_data[label_data['jobid'] == JOB_ID]

app_name = temp_label_data['appname'].values[0]
anom_name = temp_label_data['anomaly'].values[0]
#print(temp_label_data)

In [12]:
job_data

Unnamed: 0_level_0,Unnamed: 1_level_0,MemFree::meminfo,MemAvailable::meminfo,Buffers::meminfo,Cached::meminfo,SwapCached::meminfo,Active::meminfo,Inactive::meminfo,Active(anon)::meminfo,Inactive(anon)::meminfo,Active(file)::meminfo,...,softirq::procstat,steal::procstat,guest::procstat,guest_nice::procstat,hwintr_count::procstat,context_switches::procstat,processes::procstat,procs_running::procstat,procs_blocked::procstat,softirq_count::procstat
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
5d156e8defd245b1b55cf373d60eec56,1601497356,128052292,127749180,0,159764,0,663356,135668,642084,64388,21272,...,0,0,0,0,450,522,1,2,0,613
5d156e8defd245b1b55cf373d60eec56,1601497357,128052564,127749452,0,159764,0,663356,135668,642084,64388,21272,...,0,0,0,0,570,726,1,2,0,599
5d156e8defd245b1b55cf373d60eec56,1601497358,128052184,127749072,0,159764,0,663356,135668,642084,64388,21272,...,0,0,0,0,405,539,0,3,0,517
5d156e8defd245b1b55cf373d60eec56,1601497359,128052456,127749344,0,159764,0,663356,135668,642084,64388,21272,...,0,0,0,0,394,467,1,3,0,533
5d156e8defd245b1b55cf373d60eec56,1601497360,128052728,127749616,0,159764,0,663356,135668,642084,64388,21272,...,0,0,0,0,395,524,1,1,0,523
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
d6c342b8e30d4b35be72f09833eb17a6,1601498814,128175332,127904720,0,226920,0,502048,190952,469632,58536,32416,...,0,0,0,0,352,396,1,1,0,465
d6c342b8e30d4b35be72f09833eb17a6,1601498815,128175100,127904488,0,226920,0,502048,190952,469632,58536,32416,...,0,0,0,0,366,488,0,1,0,465
d6c342b8e30d4b35be72f09833eb17a6,1601498816,128175372,127904760,0,226920,0,502048,190952,469632,58536,32416,...,0,0,0,0,509,584,1,1,0,505
d6c342b8e30d4b35be72f09833eb17a6,1601498817,128175224,127904612,0,226920,0,502048,190952,469632,58536,32416,...,0,0,0,0,769,1210,1,1,0,1159


In [14]:
predictor = RuntimePredictor(pickle_dir=PICKLE_OUT,feature_select=False,pickle_name="eclipse_rf_fs.pickle")

#temp_results, preds  = predictor.TEST_predict_from_DF(job_data)
job_results = predictor.TEST_predict_from_DF(job_data)
#job_results.to_csv(RESULT_OUT / (app_name + '_' + anom_name + '.csv'))
job_results

2020-10-22 17:46:34,344 INFO    Preparing results for each node


## Testing Runtime Module with Actual Data from Eclipse

In [8]:
### To get results from runtime data saved as CSV
PICKLE_OUT = Path('/usr3/graduate/baksar/projectx/ADF-Deployment/model')
RUNTIME_DATA = Path('/usr3/graduate/baksar/projectx/ADF-Deployment/Runtime/DATA')
RUNTIME_OUT = RUNTIME_DATA / 'OUT'
CSV_DATA = RUNTIME_DATA / '12314.csv'

predictor = RuntimePredictor(pickle_dir=PICKLE_OUT,feature_select=True,pickle_name="eclipse_rf_fs.pickle")
runtime_data = pd.read_csv(CSV_DATA)

runtime_data.head()

2020-10-22 17:42:41,776 INFO    Selected 509 features


Unnamed: 0,timestamp,component_id,MemFree,MemAvailable,Buffers,Cached,SwapCached,Active,Inactive,Active(anon),...,compact_success,htlb_buddy_alloc_success,htlb_buddy_alloc_fail,unevictable_pgs_culled,unevictable_pgs_scanned,unevictable_pgs_rescued,unevictable_pgs_mlocked,unevictable_pgs_munlocked,unevictable_pgs_cleared,unevictable_pgs_stranded
0,1597074960002,700024.0,88896800.0,92593112.0,0.0,4623588.0,0.0,37690376.0,1451196.0,34520700.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
1,1597074960002,700028.0,98647424.0,98390908.0,0.0,593712.0,0.0,29277200.0,575568.0,29261740.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
2,1597074960003,700023.0,87949440.0,89316432.0,0.0,2237216.0,0.0,38775384.0,1557388.0,38099068.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
3,1597074960004,700030.0,96907484.0,97516728.0,0.0,1501940.0,0.0,30580084.0,888392.0,29969964.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0
4,1597074960005,700029.0,95917576.0,98128260.0,0.0,3088096.0,0.0,31220536.0,1018676.0,29154520.0,...,0.0,0.0,0.0,0.0,0.0,0.0,512.0,0.0,0.0,0.0


In [4]:
##This will write results as CSV
predictor.predict_from_CSV(job_data_path=CSV_DATA)

2020-10-22 17:39:43,250 INFO    Preparing results for each node
2020-10-22 17:39:43,254 INFO    Feature Selection
2020-10-22 17:39:43,392 INFO    Feature Selection
2020-10-22 17:39:43,525 INFO    Feature Selection
2020-10-22 17:39:43,660 INFO    Feature Selection
2020-10-22 17:39:43,793 INFO    Feature Selection
2020-10-22 17:39:43,927 INFO    Feature Selection
2020-10-22 17:39:44,060 INFO    Feature Selection
2020-10-22 17:39:44,195 INFO    Feature Selection
2020-10-22 17:39:44,327 INFO    Feature Selection


In [5]:
#This will return results as DF 
#FIXME: It returns only membw label
temp_node_results = predictor.predict_from_DF(runtime_data)

2020-10-22 17:39:50,064 INFO    Preparing results for each node
2020-10-22 17:39:50,066 INFO    Feature Selection
2020-10-22 17:39:50,200 INFO    Feature Selection
2020-10-22 17:39:50,333 INFO    Feature Selection
2020-10-22 17:39:50,466 INFO    Feature Selection
2020-10-22 17:39:50,601 INFO    Feature Selection
2020-10-22 17:39:50,734 INFO    Feature Selection
2020-10-22 17:39:50,867 INFO    Feature Selection
2020-10-22 17:39:51,009 INFO    Feature Selection
2020-10-22 17:39:51,156 INFO    Feature Selection


In [6]:
temp_node_results

Unnamed: 0_level_0,Unnamed: 1_level_0,preds,prob
node_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1
700024,1597074960,memleak,0.52
700024,1597075020,memleak,0.52
700024,1597075080,memleak,0.51
700024,1597075140,memleak,0.52
700024,1597075200,memleak,0.53
...,...,...,...
700027,1597077600,memleak,0.51
700027,1597077660,memleak,0.51
700027,1597077720,memleak,0.52
700027,1597077780,memleak,0.52


## Some Other Tests


In [None]:
# ### TEST PURPOSES
# if conf['granularity'] != 0:
#     getter = WindowShopper(node_data, None, trim=0, window_size=conf['window_size'])
# else:
#     getter = WindowShopper(node_data, None, window_size=conf['window_size'])
# windowed_node_data, _ = getter.return_windowed_dataset()

# logging.info("Windowed data shape %s",windowed_node_data.shape)

# #Generate Features
# logging.info("Generating features")
# feature_generator = TSFeatureGenerator(trim=0) #Don't change the trim
# feature_node_data = feature_generator.transform(windowed_node_data)

# logging.info("Feature data shape %s",feature_node_data.shape)

# preds = predictor.model.predict(feature_node_data)
# preds

In [48]:
# ### TEST THE PICKLE WITH FULL TEST DATA 
# train_label = pd.read_csv(conf['output_training_dir'] / ('CV_' + str(conf['cv_fold'])) /'train_label.csv',index_col=['node_id'])
# logging.info(train_label.shape)
# test_label = pd.read_csv(conf['output_training_dir'] / ('CV_' + str(conf['cv_fold'])) / 'test_label.csv',index_col=['node_id'])
# logging.info(test_label.shape)

# train_data = pd.read_hdf(conf['hdf_data_path'] / 'train_data.hdf','train_data')
# train_data = train_data[[x for x in train_data.columns if 'per_core' not in x]]
# logging.info("Training data shape: %s",train_data.shape)
# logging.info("Reducing the data granularity")
# train_data = granularityAdjust(train_data,granularity=conf['granularity'])
# logging.info("Training data shape: %s",train_data.shape)


# test_data = pd.read_hdf(conf['hdf_data_path'] / 'test_data.hdf','test_data')
# test_data = test_data[[x for x in test_data.columns if 'per_core' not in x]]
# logging.info("Test data shape: %s",test_data.shape)
# logging.info("Reducing the data granularity")
# test_data = granularityAdjust(test_data,granularity=conf['granularity'])
# logging.info("Test data shape: %s",test_data.shape)

# train_data_full = pd.concat([train_data,test_data])
# logging.info("Full data shape: %s",train_data_full.shape)
# logging.info("Unique node ids %s",len(train_data_full.index.get_level_values('node_id').unique()))
# #train_data_full.head()

# train_data_full = train_data_full.dropna()
# logging.info("Is NaN: %s",np.any(np.isnan(train_data_full)))
# logging.info("Data shape: %s",train_data_full.shape)

# #Using the saved scaler
# train_data_full = pd.DataFrame(predictor.scaler.transform(train_data_full), columns=train_data_full.columns, index=train_data_full.index)


# ###TEST LABEL
# if conf['granularity'] != 0:
#     getter = WindowShopper(train_data_full, test_label, trim=0, window_size=conf['window_size'])
# else:
#     getter = WindowShopper(train_data_full, test_label, window_size=conf['window_size'])
# windowed_train_data, windowed_train_label = getter.return_windowed_dataset()

# logging.info("Windowed data shape %s",windowed_train_data.shape)
# logging.info("Windowed label shape %s",windowed_train_label.shape)

# #Generate Features
# logging.info("Generating features")
# feature_generator = TSFeatureGenerator(trim=0) #Don't change the trim
# feature_train_data = feature_generator.transform(windowed_train_data)

# logging.info("Testing pipeline!")
# preds = predictor.model.predict(feature_train_data)

# logging.info("Generating report!")
# ### Saves classification report where all apps are combined
# report_dict = classification_report(y_true=windowed_train_label['anom'].astype('int'), y_pred=preds, labels=windowed_train_label['anom'].unique())
# print(report_dict)

In [12]:
# #Check the written data
# PICKLE_OUT = Path('/projectnb/peaclab-mon/aksar/models/eclipse')
# data = pd.read_csv(PICKLE_OUT / ('results.csv'),index_col=['node_id','timestamp'])

In [None]:
### Test code
# node_time_first = [int(timestamp/1000) for timestamp in list(runtime_data[runtime_data.index.get_level_values('node_id') == 700024].index.get_level_values('timestamp'))]
# node_time_second = [int(timestamp/1000) for timestamp in list(runtime_data[runtime_data.index.get_level_values('node_id') == 700028].index.get_level_values('timestamp'))]
# common_time = reduce(np.intersect1d,(node_time_first,node_time_second))
# logging.info(len(node_time_first))
# logging.info(len(node_time_second))
# logging.info(len(common_time))