#  Description

# # 1. Database Description

1.1. Introduction
This database is composed of 1951 multivariate time-series acquired by sensors on a SpectraQuest's Machinery Fault Simulator (MFS) Alignment-Balance-Vibration (ABVT). The 1951 comprises six different simulated states: normal function, imbalance fault, horizontal and vertical misalignment faults and, inner and outer bearing faults. This section describes the database.

The database is composed by several CSV (Comma-Separated Values) files, each one with 8 columns, one column for each sensor, according to:

column 1 -->
tachometer signal that allows to estimate rotation frequency;

columns 2 to 4 -->
underhang bearing accelerometer (axial, radiale tangential direction);

columns 5 to 7 -->
overhang bearing accelerometer (axial, radiale tangential direction);

column 8 -->
microphone.

[Link to the DateSet HomePage, Click Here.](http://www02.smt.ufrj.br/~offshore/mfs/page_01.html)

#  Loading DataSet

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob 
import statsmodels
import time

In [7]:
cur_path = "/kaggle/input/fault-induction-motor-dataset/"

#save all files path present in that folder into a file
normal_file_names = glob.glob(cur_path +'/normal/normal/*.csv')
imnormal_file_names_6g = glob.glob(cur_path+'/imbalance/imbalance/6g/*.csv')
imnormal_file_names_10g = glob.glob(cur_path+'/imbalance/imbalance/10g/*.csv')
imnormal_file_names_15g = glob.glob(cur_path+'/imbalance/imbalance/15g/*.csv')
imnormal_file_names_20g = glob.glob(cur_path+'/imbalance/imbalance/20g/*.csv')
imnormal_file_names_25g = glob.glob(cur_path+'/imbalance/imbalance/25g/*.csv')
imnormal_file_names_30g = glob.glob(cur_path+'/imbalance/imbalance/30g/*.csv')

In [8]:
normal_file_names[:2]

In [9]:
imnormal_file_names_6g[:2]

In [10]:
colnames = ['rpm','uh_a','uh_r','Uh_t','oh_a','oh_r','oh_t','sound']

#append all the files into one
def dataReader(path_names):
    data_n = pd.DataFrame()
    for i in path_names:
        low_data = pd.read_csv(i,header= None,names=colnames)
        data_n = pd.concat([data_n,low_data],ignore_index=True)
    return data_n

In [11]:
toc = time.time()

data_n = dataReader(normal_file_names)
data_6g = dataReader(imnormal_file_names_6g)
data_10g = dataReader(imnormal_file_names_10g)
data_15g = dataReader(imnormal_file_names_15g)
data_20g = dataReader(imnormal_file_names_20g)
data_25g = dataReader(imnormal_file_names_25g)
data_30g = dataReader(imnormal_file_names_30g)

tic = time.time()
print('execution time ',(tic - toc)*1000,'ms')

In [12]:
data_n.info()


In [13]:
data_n.head()

# Data Sampling

In [14]:
def downSampler(data,a,b):
    """
    data = data
    a = start index
    b = sampling rate
    """
    data_decreased = pd.DataFrame()
    x = b
    for i in range(int(len(data)/x)):
        data_decreased = data_decreased.append(data.iloc[a:b,:].sum()/x,ignore_index=True)
        a += x
        b += x
    return data_decreased

In [15]:
data_n = downSampler(data_n, 0, 5000)
data_6g = downSampler(data_6g, 0, 5000)
data_10g = downSampler(data_10g, 0, 5000)
data_15g = downSampler(data_15g, 0, 5000)
data_20g = downSampler(data_20g, 0, 5000)
data_25g = downSampler(data_25g, 0, 5000)
data_30g = downSampler(data_30g, 0, 5000)

In [16]:
data_n.describe()

# Data Visualization

In [17]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 6g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_6g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

In [18]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 10g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_10g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

In [19]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 15g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_15g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

In [20]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 20g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_20g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

In [21]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 25g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_25g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

In [22]:
fig, axs = plt.subplots(8, sharex=False, sharey=False,figsize=(15,15))
fig.suptitle('All Features of Normal Vs 30g imbalance state State')
j = 0
for i in data_n.columns:
    axs[j].plot(data_30g[i],'r')
    axs[j].plot(data_n[i],'g')
    axs[j].set_title('{} Column of Dataset'.format(i))
    
    #axs[j].set_title('{} Column of imbalance State'.format(i))
    j += 1

# Data Preprocessing & Transformation from time domain to frequency domain

In [25]:
from scipy import signal
def FFT(data):
    autocorr = signal.fftconvolve(data,data[::-1],mode='full')
    return pd.DataFrame(autocorr)

In [26]:
data_n = FFT(data_n)
data_6g = FFT(data_6g)
data_10g = FFT(data_10g)
data_15g = FFT(data_15g)
data_20g = FFT(data_20g)
data_25g = FFT(data_25g)
data_30g = FFT(data_30g)

# Data Scaling

In [27]:
from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
data_n  = pd.DataFrame(mms.fit_transform(data_n)) 
data_6g = pd.DataFrame(mms.fit_transform(data_6g)) 
data_10g= pd.DataFrame(mms.fit_transform(data_10g)) 
data_15g= pd.DataFrame(mms.fit_transform(data_15g)) 
data_20g= pd.DataFrame(mms.fit_transform(data_20g)) 
data_25g= pd.DataFrame(mms.fit_transform(data_25g)) 
data_30g= pd.DataFrame(mms.fit_transform(data_30g)) 

In [28]:
data_n.describe()

In [29]:
data_n.head()

In [30]:
y_1 = pd.DataFrame(np.ones(int(len(data_n)),dtype=int))
y_2 = pd.DataFrame(np.zeros(int(len(data_6g)),dtype=int))
y_3 = pd.DataFrame(np.full((int(len(data_10g)),1),2))
y_4 = pd.DataFrame(np.full((int(len(data_15g)),1),3))
y_5 = pd.DataFrame(np.full((int(len(data_20g)),1),4))
y_6 = pd.DataFrame(np.full((int(len(data_25g)),1),5))
y_7 = pd.DataFrame(np.full((int(len(data_30g)),1),6))
y = pd.concat([y_1,y_2,y_3,y_4,y_5,y_6,y_7], ignore_index=True)
y.shape

In [31]:
data = pd.concat([data_n,data_6g,data_10g,data_15g,data_20g,data_25g,data_30g],ignore_index=True)
data.shape

# Data Splitting

In [32]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.25,shuffle=True)

In [33]:
print("Shape of Train Data : {}".format(X_train.shape))
print("Shape of Test Data : {}".format(X_test.shape))

# SVM Classifire

In [34]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train,y_train)
print("SVM accuracy is {} on Train Dataset".format(svm.score(X_train,y_train)))
print("SVM accuracy is {} on Test Dataset".format(svm.score(X_test,y_test)))

In [35]:
from sklearn.metrics import classification_report, confusion_matrix
predictions = svm.predict(X_test)
print(classification_report(y_test, predictions))

# KNN

In [36]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3, weights = 'distance' )
knn.fit(X_train,y_train)
print("k={}NN Accuracy on Train Data: {}".format(3,knn.score(X_train,y_train)))
print("k={}NN Accuracy on Test Data: {}".format(3,knn.score(X_test,y_test)))

# Random Forest

In [39]:
from sklearn.ensemble import RandomForestClassifier
classifier_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=5,
                                       n_estimators=100, oob_score=True)
classifier_rf.fit(X_train,y_train)
print("Random Forest Accuracy on Train Data: {}".format(classifier_rf.score(X_train,y_train)))
print("Random Forest Accuracy on Test Data: {}".format(classifier_rf.score(X_test,y_test)))