In [3]:
!pip install numpy pandas matplotlib seaborn plotly opendatasets --quiet


In [4]:
from numpy.fft import rfft, rfftfreq
from sklearn import preprocessing
import os
import pandas as pd
import numpy as np
import glob
import timeit

In [5]:
normal="VBL-VA001/VBL-VA001/normal"
# assgning the path to variable
bearing = "VBL-VA001/VBL-VA001/bearing"
unbalance="VBL-VA001/VBL-VA001/unbalance"
normal_file_names = glob.glob(normal + '/*.csv')
# to return all file with.csv extension in normal
bearing_file_names= glob.glob(bearing + '/*.csv')
unbalance_file_names= glob.glob(unbalance + '/*.csv')




In [6]:

def std(data):
    '''Standard Deviation features'''
    data = np.asarray(data)
    stdev = pd.DataFrame(np.std(data, axis=1))
    return stdev

def mean(data):
    '''Mean features'''
    data = np.asarray(data)
    M = pd.DataFrame(np.mean(data, axis=1))
    return M

def pp(data):
    '''Peak-to-Peak features'''
    data = np.asarray(data)
    PP = pd.DataFrame(np.max(data, axis=1) - np.min(data, axis=1))
    return PP

def Impulsef(data):
    '''Impulse factor features'''
    data = np.asarray(data)
    impulse = pd.DataFrame(np.max(data)/Ab_mean(data))
    return impulse

def rms(data):
    '''RMS features'''
    data = np.asarray(data)
    Rms = pd.DataFrame(np.sqrt(np.mean(data**2, axis=1)))
    return Rms

def kurtosis(data):
    '''Kurtosis features'''
    data = pd.DataFrame(data)
    kurt = data.kurt(axis=1)
    return kurt

def skew(data):
    '''Skewness features'''
    data = pd.DataFrame(data)
    skw = data.skew(axis=1)
    return skw

def Shapef(data):
    '''Shape factor features'''
    data = np.asarray(data)
    shapef = pd.DataFrame(rms(data)/Ab_mean(data))
    return shapef





def crestf(data):
    '''Crest factor features'''
    data = np.asarray(data)
    crest = pd.DataFrame(np.max(data)/rms(data))
    return crest




def Variance(data):
    '''Variance features'''
    data = np.asarray(data)
    Var = pd.DataFrame(np.var(data, axis=1))
    return Var

# Helper functions to calculate features
def Ab_mean(data):
    data = np.asarray(data)
    Abm = pd.DataFrame(np.mean(np.absolute(data), axis=1))
    return Abm


def SQRT_AMPL(data):
    data = np.asarray(data)
    SQRTA = pd.DataFrame((np.mean(np.sqrt(np.absolute(data, axis=1))))**2)
    return SQRTA


def clearancef(data):
    data = np.asarray(data)
    clrf = pd.DataFrame(np.max(data, axis=1)/SQRT_AMPL(data))
    return clrf

In [7]:
def data_1x(normal_file_names):
    data1x = pd.DataFrame()
    for f1x in normal_file_names:
        df1x = pd.read_csv(f1x, usecols=[1], header=None)  # read the csv file
        data1x = pd.concat([data1x, df1x], axis=1, ignore_index=True)
    return data1x

In [8]:
def data_1y(normal_file_names):
    data1y = pd.DataFrame()
    for f1y in normal_file_names:
        df1y = pd.read_csv(f1y, usecols=[2], header=None)  # read the csv file
        data1y = pd.concat([data1y, df1y], axis=1, ignore_index=True)
    return data1y

In [9]:
def data_1z(normal_file_names):
    data1z = pd.DataFrame()
    for f1z in normal_file_names:
        df1z = pd.read_csv(f1z, usecols=[3], header=None)  # read the csv file
        data1z = pd.concat([data1z, df1z], axis=1, ignore_index=True)
    return data1z



In [11]:
def NormalizeData(data):  
    data_max = np.max(data)
    data_min = np.min(data)
    return (data - np.min(data_min)) / (np.max(data_max) - np.min(data_min))
#     print(data_max)
# Normalise (0-1)

In [12]:
def ProcessedFile(name):
    data_x =np.array(data_1x(name).T.dropna(axis=1))
    data_y =np.array(data_1y(name).T.dropna(axis=1))
    data_z =np.array(data_1z(name).T.dropna(axis=1))
    data_x=NormalizeData(data_x)
    data_y=NormalizeData(data_y)
    data_z=NormalizeData(data_z)
    data_1x_shape=Shapef(data_x)
    data_1y_shape=Shapef(data_y)
    data_1z_shape=Shapef(data_z)
    data_shape = pd.concat([data_1x_shape,data_1y_shape,data_1z_shape], axis=1,ignore_index=True)
    data_1x_std=std(data_x)
    data_1y_std=std(data_y)
    data_1z_std=std(data_z)
    data_std = pd.concat([data_1x_std,data_1y_std,data_1z_std], axis=1,ignore_index=True)
    data_1x_mean=mean(data_x)
    data_1y_mean=mean(data_y)
    data_1z_mean=mean(data_z)
    data_mean= pd.concat([data_1x_mean,data_1y_mean,data_1z_mean], axis=1,ignore_index=True)
    data_1x_pp=pp(data_x)
    data_1y_pp=pp(data_y)
    data_1z_pp=pp(data_z)
    data_pp= pd.concat([data_1x_pp,data_1y_pp,data_1z_pp], axis=1,ignore_index=True)
    data_1x_variance=Variance(data_x)
    data_1y_variance=Variance(data_y)
    data_1z_variance=Variance(data_z)
    data_variance= pd.concat([data_1x_variance,data_1y_variance,data_1z_variance], axis=1,ignore_index=True)
    data_1x_rms=rms(data_x)
    data_1y_rms=rms(data_y)
    data_1z_rms=rms(data_z)
    data_rms= pd.concat([data_1x_rms,data_1y_rms,data_1z_rms], axis=1,ignore_index=True)
    data_1x_impulse=Impulsef(data_x)
    data_1y_impulse=Impulsef(data_y)
    data_1z_impulse=Impulsef(data_z)
    data_impulse= pd.concat([data_1x_impulse,data_1y_impulse,data_1z_impulse], axis=1,ignore_index=True)
    data_1x_crest=crestf(data_x)
    data_1y_crest=crestf(data_y)
    data_1z_crest=crestf(data_z)
    data_crest= pd.concat([data_1x_crest,data_1y_crest,data_1z_crest], axis=1,ignore_index=True)
    data_1x_kurt=kurtosis(data_x)
    data_1y_kurt=kurtosis(data_y)
    data_1z_kurt=kurtosis(data_z)
    data_kurt= pd.concat([data_1x_kurt,data_1y_kurt,data_1z_kurt], axis=1,ignore_index=True)
    data_1x_sk=skew(data_x)
    data_1y_sk=skew(data_y)
    data_1z_sk=skew(data_z)
    data_sk= pd.concat([data_1x_sk,data_1y_sk,data_1z_sk], axis=1,ignore_index=True)
    x_1 = pd.concat([data_shape, data_std, data_mean, data_pp, data_variance,
                data_rms, data_impulse, data_crest, data_kurt,data_sk], axis=1, ignore_index=True)
    return x_1
    
    

In [28]:
# data_normal_x = NormalizeData(data_normal_x)
# data_normal_y = NormalizeData(data_normal_y)
# data_normal_z = NormalizeData(data_normal_z)
# data_bearing_x = NormalizeData(data_bearing_x)
# data_bearing_y = NormalizeData(data_bearing_y)
# data_bearing_z = NormalizeData(data_bearing_z)
# data_unbalance_x = NormalizeData(data_unbalance_x)
# data_unbalance_y = NormalizeData(data_unbalance_y)
# data_unbalance_z = NormalizeData(data_unbalance_z)



In [29]:
# data_1x_shape=Shapef(data_normal_x)
# data_1y_shape=Shapef(data_normal_y)
# data_1z_shape=Shapef(data_normal_z)
# data_2x_shape=Shapef(data_bearing_x)
# data_2y_shape=Shapef(data_bearing_y)
# data_2z_shape=Shapef(data_bearing_z)
# data_3x_shape=Shapef(data_unbalance_x)
# data_3y_shape=Shapef(data_unbalance_y)
# data_3z_shape=Shapef(data_unbalance_z)
# data_shape = pd.concat([data_1x_shape,data_1y_shape,data_1z_shape], axis=1,ignore_index=True)
# data2_shape = pd.concat([data_2x_shape,data_2y_shape,data_2z_shape], axis=1,ignore_index=True)
# data3_shape = pd.concat([data_3x_shape,data_3y_shape,data_3z_shape], axis=1,ignore_index=True)


In [30]:
# data3_shape.shape


(1000, 3)

In [31]:
# data_1x_std=std(data_normal_x)
# data_1y_std=std(data_normal_y)
# data_1z_std=std(data_normal_z)
# data_std = pd.concat([data_1x_std,data_1y_std,data_1z_std], axis=1,ignore_index=True)
# data_2x_std=std(data_bearing_x)
# data_2y_std=std(data_bearing_y)
# data_2z_std=std(data_bearing_z)
# data_3x_std=std(data_unbalance_x)
# data_3y_std=std(data_unbalance_y)
# data_3z_std=std(data_unbalance_z)
# data2_std = pd.concat([data_2x_std,data_2y_std,data_2z_std], axis=1,ignore_index=True)
# data3_std = pd.concat([data_3x_std,data_3y_std,data_3z_std], axis=1,ignore_index=True)
# data3_std.shape

(1000, 3)

In [32]:
# data_1x_mean=mean(data_normal_x)
# data_1y_mean=mean(data_normal_y)
# data_1z_mean=mean(data_normal_z)
# data_mean= pd.concat([data_1x_mean,data_1y_mean,data_1z_mean], axis=1,ignore_index=True)
# data_2x_mean=mean(data_bearing_x)
# data_2y_mean=mean(data_bearing_y)
# data_2z_mean=mean(data_bearing_z)
# data_3x_mean=mean(data_unbalance_x)
# data_3y_mean=mean(data_unbalance_y)
# data_3z_mean=mean(data_unbalance_z)
# data2_mean= pd.concat([data_2x_mean,data_2y_mean,data_2z_mean], axis=1,ignore_index=True)
# data3_mean= pd.concat([data_3x_mean,data_3y_mean,data_3z_mean], axis=1,ignore_index=True)
# data3_mean.shape

(1000, 3)

In [33]:
# data_1x_pp=pp(data_normal_x)
# data_1y_pp=pp(data_normal_y)
# data_1z_pp=pp(data_normal_z)
# data_pp= pd.concat([data_1x_pp,data_1y_pp,data_1z_pp], axis=1,ignore_index=True)
# data_2x_pp=pp(data_bearing_x)
# data_2y_pp=pp(data_bearing_y)
# data_2z_pp=pp(data_bearing_z)
# data2_pp= pd.concat([data_2x_pp,data_2y_pp,data_2z_pp], axis=1,ignore_index=True)
# data_3x_pp=pp(data_unbalance_x)
# data_3y_pp=pp(data_unbalance_y)
# data_3z_pp=pp(data_unbalance_z)
# data3_pp= pd.concat([data_3x_pp,data_3y_pp,data_3z_pp], axis=1,ignore_index=True)
# data3_pp.shape

(1000, 3)

In [34]:
# data_1x_variance=Variance(data_normal_x)
# data_1y_variance=Variance(data_normal_y)
# data_1z_variance=Variance(data_normal_z)
# data_variance= pd.concat([data_1x_variance,data_1y_variance,data_1z_variance], axis=1,ignore_index=True)
# data_2x_variance=Variance(data_bearing_x)
# data_2y_variance=Variance(data_bearing_y)
# data_2z_variance=Variance(data_bearing_z)
# data2_variance= pd.concat([data_2x_variance,data_2y_variance,data_2z_variance], axis=1,ignore_index=True)
# data_3x_variance=Variance(data_unbalance_x)
# data_3y_variance=Variance(data_unbalance_y)
# data_3z_variance=Variance(data_unbalance_z)
# data3_variance= pd.concat([data_3x_variance,data_3y_variance,data_3z_variance], axis=1,ignore_index=True)
# data3_variance.shape

(1000, 3)

In [35]:
# data_1x_rms=rms(data_normal_x)
# data_1y_rms=rms(data_normal_y)
# data_1z_rms=rms(data_normal_z)
# data_rms= pd.concat([data_1x_rms,data_1y_rms,data_1z_rms], axis=1,ignore_index=True)
# data_2x_rms=rms(data_bearing_x)
# data_2y_rms=rms(data_bearing_y)
# data_2z_rms=rms(data_bearing_z)
# data2_rms= pd.concat([data_2x_rms,data_2y_rms,data_2z_rms], axis=1,ignore_index=True)
# data_3x_rms=rms(data_unbalance_x)
# data_3y_rms=rms(data_unbalance_y)
# data_3z_rms=rms(data_unbalance_z)
# data3_rms= pd.concat([data_3x_rms,data_3y_rms,data_3z_rms], axis=1,ignore_index=True)
# data3_rms.shape

(1000, 3)

In [178]:
# data_1x_impulse=Impulsef(data_normal_x)
# data_1y_impulse=Impulsef(data_normal_y)
# data_1z_impulse=Impulsef(data_normal_z)
# data_impulse= pd.concat([data_1x_impulse,data_1y_impulse,data_1z_impulse], axis=1,ignore_index=True)
# data_2x_impulse=Impulsef(data_bearing_x)
# data_2y_impulse=Impulsef(data_bearing_y)
# data_2z_impulse=Impulsef(data_bearing_z)
# data2_impulse= pd.concat([data_2x_impulse,data_2y_impulse,data_2z_impulse], axis=1,ignore_index=True)
# data_impulse.shape

(1000, 3)

In [179]:
# data_1x_crest=crestf(data_normal_x)
# data_1y_crest=crestf(data_normal_y)
# data_1z_crest=crestf(data_normal_z)
# data_crest= pd.concat([data_1x_crest,data_1y_crest,data_1z_crest], axis=1,ignore_index=True)
# data_2x_crest=crestf(data_bearing_x)
# data_2y_crest=crestf(data_bearing_y)
# data_2z_crest=crestf(data_bearing_z)
# data2_crest= pd.concat([data_2x_crest,data_2y_crest,data_2z_crest], axis=1,ignore_index=True)
# data_crest.shape

(1000, 3)

In [180]:
# data_1x_kurt=kurtosis(data_normal_x)
# data_1y_kurt=kurtosis(data_normal_y)
# data_1z_kurt=kurtosis(data_normal_z)
# data_kurt= pd.concat([data_1x_kurt,data_1y_kurt,data_1z_kurt], axis=1,ignore_index=True)
# data_2x_kurt=kurtosis(data_bearing_x)
# data_2y_kurt=kurtosis(data_bearing_y)
# data_2z_kurt=kurtosis(data_bearing_z)
# data2_kurt= pd.concat([data_2x_kurt,data_2y_kurt,data_2z_kurt], axis=1,ignore_index=True)
# data_kurt.shape

(1000, 3)

In [181]:
# data_1x_sk=skew(data_normal_x)
# data_1y_sk=skew(data_normal_y)
# data_1z_sk=skew(data_normal_z)
# data_sk= pd.concat([data_1x_sk,data_1y_sk,data_1z_sk], axis=1,ignore_index=True)
# data_2x_sk=skew(data_bearing_x)
# data_2y_sk=skew(data_bearing_y)
# data_2z_sk=skew(data_bearing_z)
# data2_sk= pd.concat([data_2x_sk,data_2y_sk,data_2z_sk], axis=1,ignore_index=True)
# data_sk.shape

(1000, 3)

In [182]:
# x_1 = pd.concat([data_shape, data_std, data_mean, data_pp, data_variance,
#                 data_rms, data_impulse, data_crest, data_kurt,data_sk], axis=1, ignore_index=True)
# x_2 = pd.concat([data2_shape, data2_std, data2_mean, data2_pp, data2_variance,
#                 data2_rms, data2_impulse, data2_crest, data2_kurt,data2_sk], axis=1, ignore_index=True)
# x_1.shape

(1000, 30)

In [183]:
# x_1.to_csv('normal_processed.csv', header=False, index=False)
# x_2.to_csv('bearing_processed.csv', header=False, index=False)

In [13]:
x_1 = ProcessedFile(unbalance_file_names)
x_1.to_csv('unbalanced_processed.csv',header=False,index=False)