In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib widget

In [2]:
dofs = 8
num_states = 5
num_time_histories = 100 # Number of time-histories

In [3]:
# Acceleration statistics data of all masses for all time histories and all states
peak_amp = np.zeros((num_time_histories, num_states, dofs))
mean = np.zeros((num_time_histories, num_states, dofs))
rms = np.zeros((num_time_histories, num_states, dofs))
std = np.zeros((num_time_histories, num_states, dofs))
skewness = np.zeros((num_time_histories, num_states, dofs))
kurtosis = np.zeros((num_time_histories, num_states, dofs))

for time_history in range(num_time_histories):
    for state in range(num_states):
        acc = pd.read_csv(f"datasets/state{state}_datasets/state{state}_acceleration_timehistory{time_history+1}.csv", index_col = "Time")
        peak_amp[time_history][state, :] = ((acc.abs()).max()).to_numpy()
        mean[time_history][state, :] = (acc.mean()).to_numpy()
        rms[time_history][state, :] = (((acc**2).mean())**0.5).to_numpy()
        std[time_history][state, :] = (acc.std()).to_numpy()
        skewness[time_history][state, :] = (acc.skew()).to_numpy()
        kurtosis[time_history][state, :] = (acc.kurtosis()).to_numpy()

In [4]:
statistical_feature_df = pd.DataFrame(columns = ["health", "state"])
statistical_feature_df["health"] = ([0] + [1] * (num_states - 1)) * num_time_histories
statistical_feature_df["state"] = [i for i in range(num_states)] * num_time_histories

peak_amp_df = pd.DataFrame()
for time_history in range(num_time_histories):
   temp = pd.DataFrame(peak_amp[time_history], columns = [f"peak_amp_m{i+1}" for i in range(dofs)])
   peak_amp_df = peak_amp_df.append(temp, ignore_index=True)

rms_df = pd.DataFrame()
for time_history in range(num_time_histories):
   temp = pd.DataFrame(rms[time_history], columns = [f"rms_m{i+1}" for i in range(dofs)])
   rms_df = rms_df.append(temp, ignore_index=True)

std_df = pd.DataFrame()
for time_history in range(num_time_histories):
   temp = pd.DataFrame(std[time_history], columns = [f"std_m{i+1}" for i in range(dofs)])
   std_df = std_df.append(temp, ignore_index=True)

skewness_df = pd.DataFrame()
for time_history in range(num_time_histories):
   temp = pd.DataFrame(skewness[time_history], columns = [f"skewness_m{i+1}" for i in range(dofs)])
   skewness_df = skewness_df.append(temp, ignore_index=True)

kurtosis_df = pd.DataFrame()
for time_history in range(num_time_histories):
   temp = pd.DataFrame(kurtosis[time_history], columns = [f"kurtosis_m{i+1}" for i in range(dofs)])
   kurtosis_df = kurtosis_df.append(temp, ignore_index=True)


statistical_feature_df = pd.concat([statistical_feature_df, peak_amp_df, rms_df, std_df, skewness_df, kurtosis_df], axis = 1)
statistical_feature_df.to_csv("datasets/statistical_features.csv", index=False)

In [5]:
X = statistical_feature_df.iloc[:,2:]
y = statistical_feature_df['health']


In [6]:
### Feature Selection

# f-classifier

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

ordered_rank_features = SelectKBest(score_func=f_classif, k=40)
ordered_features = ordered_rank_features.fit(X, y)
score1 = pd.DataFrame(ordered_features.scores_, columns=["Score"])
score1 = pd.concat([pd.DataFrame(X.columns), score1], axis = 1)
score1

Unnamed: 0,0,Score
0,peak_amp_m1,5.4e-05
1,peak_amp_m2,1.70466
2,peak_amp_m3,75.349959
3,peak_amp_m4,33.69894
4,peak_amp_m5,3.275092
5,peak_amp_m6,14.90742
6,peak_amp_m7,85.385851
7,peak_amp_m8,31.424497
8,rms_m1,8.050531
9,rms_m2,26.197984


In [17]:
# Feature Importance

from sklearn.ensemble import ExtraTreesClassifier
model = ExtraTreesClassifier()
model.fit(X, y)
ranked_features = pd.Series(model.feature_importances_, index=X.columns)
#ranked_features.plot(kind='barh')

In [21]:
# Pearson's Correlation

import seaborn as sns

corr_df = pd.concat([statistical_feature_df["health"], statistical_feature_df.iloc[:, 2:]], axis = 1)
corr_df = corr_df.corr()
#plt.figure(figsize=(30,30))
#sns.heatmap(corr_df[corr_df.index].corr(), annot=True)

In [19]:
# Information Gain

from sklearn.feature_selection import mutual_info_classif

mutual_info = mutual_info_classif(X, y)
mutual_info_df = pd.Series(mutual_info, index=X.columns)
#mutual_info_df.plot(kind='barh')