In [1]:
# Import required libraries
import pandas as pd
import numpy as np
from scipy.stats import kurtosis as ss_kurtosis
import os, glob
import uuid

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 10000)

In [2]:
# Read waveform data samples
data_path = "../Data/Vibration-Data/" #Path to vibration CSVs
acc_wav_filenames = glob.glob(data_path + 'Pump4_PumpRadial/*td.csv')

In [3]:
acc_wav_filenames[0:5]

['../Data/Vibration-Data/Pump4_PumpRadial\\g-pump-4_pump-radial_raw-data_2019-11-24_08-15-58_1480_td.csv',
 '../Data/Vibration-Data/Pump4_PumpRadial\\g-pump-4_pump-radial_raw-data_2019-11-24_09-15-58_1480_td.csv',
 '../Data/Vibration-Data/Pump4_PumpRadial\\g-pump-4_pump-radial_raw-data_2019-11-24_10-16-00_1480_td.csv',
 '../Data/Vibration-Data/Pump4_PumpRadial\\g-pump-4_pump-radial_raw-data_2019-11-24_11-16-02_1480_td.csv',
 '../Data/Vibration-Data/Pump4_PumpRadial\\g-pump-4_pump-radial_raw-data_2019-11-24_12-16-05_1480_td.csv']

In [4]:
%%time
_files = acc_wav_filenames[0:10]
df_wav = pd.DataFrame()# define an empty dataframe
for file, i in zip(_files, range(1, len(_files)+1)):
    _name = 'sample_' + str(i)
    _tmp = pd.read_csv(file)
    _tmp['asset_id'] = 'Pump4'
    _tmp['asset_name'] = 'g-pump4'
    _tmp['datetime'] = 'datetime'+str(i)
    _tmp['direction'] = 'radial'
    _tmp['uuid'] = uuid.uuid4()
    _tmp.reset_index(drop = True, inplace = True)
    df_wav = pd.concat([df_wav, _tmp])

Wall time: 813 ms


In [5]:
# # Define aggregation rule here
# ops1 = [RMS, mean, peak, pk_pk, crest_factor, kurtosis] # Metrics to calculate on waveform amplitude
# aggregations = {}
# aggregations = dict.fromkeys(['y'], ops1)
# #aggregations.update(dict.fromkeys(['col_name1'], ops2))

# # Aggregate data and rename the columns.
# agg_df = pd.DataFrame() # Define a dataframe
# agg_df = df_.groupby([col_to_group_on],as_index=False).agg(aggregations)
# agg_df.columns = [x[1] if x[1]!='' else x[0] for x in agg_df.columns.ravel()]
# agg_df = pd.melt(agg_df, id_vars=['datetime'], value_vars=['RMS', 'mean', 'peak', 'pk_pk', 'crest_factor', 'kurtosis'],
#                  var_name='metric_name', value_name='metric_value')
# agg_df['asset_id'] = df_['asset_id'][0]
# agg_df['asset_name'] = df_['asset_name'][0]
# agg_df['param_name'] = 'overall-waveform'
# agg_df['metric_unit'] = metric_unit
# agg_df['metric_type'] = metric_type
# agg_df['direction'] = df_['direction'][0]
# agg_df['uuid'] = df_['uuid'][0]

In [6]:
# Define Function to aggrgate data
def func_to_aggregate_single_waveform(df_ , 
                               amp_col= 'y', 
                               col_to_group_on = 'uuid',
                               metric_unit = 'ms2',
                               metric_type = 'acceleration'
                              ):  
    # Define metric functions 
    def RMS(x):
        return np.sqrt(np.mean(np.square(x)))
    def mean(x):
        return np.mean(x)
    def peak(x):
        return np.max(np.abs(x))
    def pk_pk(x):
        return (np.max(x) - np.min(x))
    def crest_factor(x):
        return np.max(np.abs(x))/np.sqrt(np.mean(np.square(x)))
    def kurtosis(x):
        return ss_kurtosis(x, bias=False)

    # Define aggregation rule here
    ops1 = [RMS, mean, peak, pk_pk, crest_factor, kurtosis] # Metrics to calculate on waveform amplitude
    aggregations = {}
    aggregations = dict.fromkeys(['y'], ops1)
    #aggregations.update(dict.fromkeys(['col_name1'], ops2))
    
    # Aggregate data and rename the columns.
    agg_df = pd.DataFrame() # Define a dataframe
    df_.reset_index(drop=True, inplace=True)
    agg_df = df_.groupby([col_to_group_on], as_index=False).agg(aggregations)
    agg_df.columns = [x[1] if x[1]!='' else x[0] for x in agg_df.columns.ravel()]
    agg_df = pd.melt(agg_df, id_vars=['datetime'], value_vars=['RMS', 'mean', 'peak', 'pk_pk', 'crest_factor', 'kurtosis'],
                 var_name='metric_name', value_name='metric_value')
    agg_df['asset_id'] = df_['asset_id'][0]
    agg_df['asset_name'] = df_['asset_name'][0]
    agg_df['param_name'] = 'overall-waveform'
    agg_df['metric_unit'] = metric_unit
    agg_df['metric_type'] = metric_type
    agg_df['direction'] = df_['direction'][0]
    agg_df['uuid'] = df_['uuid'][0]
    return agg_df

In [7]:
def func_to_aggregate_overall_wavform(df_waveforms , # Wavrform df consisting 
                                      amp_col= 'y',  # amplitude column name in waveform
                                      col_to_group_on = 'datetime', # Columns to apply groupby on 
                                      metric_unit = 'ms2', # unit of the waveform
                                      metric_type = 'acceleration' # type of waveform
                                      ):
    df_overall_wavform = pd.DataFrame()
    for _id in df_wav[col_to_group_on].unique():
        df_ = df_waveforms[df_waveforms[col_to_group_on]==_id].reset_index(drop = True).copy(deep=False)
        print(df_.shape)
        df_1 = func_to_aggregate_single_waveform(df_, amp_col, col_to_group_on, metric_unit, metric_type)
        df_overall_wavform = pd.concat([df_overall_wavform, df_1])
    df_overall_wavform.reset_index(drop = True, inplace = True)
    return df_overall_wavform

In [8]:
# func_to_aggregate_single_waveform(df_, amp_col, col_to_group_on, metric_unit, metric_type)

In [9]:
func_to_aggregate_overall_wavform(df_wav)

(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)
(65536, 7)


Unnamed: 0,datetime,metric_name,metric_value,asset_id,asset_name,param_name,metric_unit,metric_type,direction,uuid
0,datetime1,RMS,7.015961,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
1,datetime1,mean,0.003129,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
2,datetime1,peak,26.465051,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
3,datetime1,pk_pk,52.542109,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
4,datetime1,crest_factor,3.772121,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
5,datetime1,kurtosis,-0.240132,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,fc74b6be-bfe5-4ebd-8360-ea8d9477933a
6,datetime2,RMS,7.041456,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,8354dfa3-fa31-44e2-923d-fa64019d3d66
7,datetime2,mean,0.003129,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,8354dfa3-fa31-44e2-923d-fa64019d3d66
8,datetime2,peak,27.904744,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,8354dfa3-fa31-44e2-923d-fa64019d3d66
9,datetime2,pk_pk,54.76473,Pump4,g-pump4,overall-waveform,ms2,acceleration,radial,8354dfa3-fa31-44e2-923d-fa64019d3d66
