In [17]:
%matplotlib inline

import numpy as np
import pandas as pd # Data frames
import matplotlib.pyplot as plt # Visuals
import seaborn as sns 
sns.set()
import csv
import re
from sklearn.model_selection import train_test_split # Create training and test sets
from sklearn.tree import DecisionTreeClassifier # Decision Trees
from sklearn import tree 
from sklearn.ensemble import RandomForestClassifier # Random Forest
from sklearn import svm #SVM
from sklearn.metrics import roc_curve # ROC Curves
from sklearn.model_selection import cross_val_score  #cross validation 
from sklearn.metrics import precision_score, recall_score, f1_score
import pywt
from sklearn.metrics import mean_squared_error
import plotly.offline as py
from scipy import signal
import time
from scipy.signal import butter, filtfilt
py.init_notebook_mode(connected=True)
plt.style.use('ggplot')

In [3]:
import os

In [4]:
root = os.getcwd()
root

'/home/bench-user/data/mt/EMG/EMG-Signal-Classification/Src/Feature_extraction'

In [8]:
preprocessed_file_dir = root + "/../Preprocessing/preprocessed_data.csv"
final_df = pd.read_csv(preprocessed_file_dir, index_col = 0)
final_df

Unnamed: 0,seconds,data1,data2,data3,data4,data5,data6,data7,data8,class
0,0.002,0.000613,0.001049,0.004225,0.012485,0.000468,0.001777,0.001689,0.001366,1
1,0.004,0.956846,0.011618,0.030003,0.111844,0.025706,0.010426,0.010897,0.007328,1
2,0.006,0.908362,0.012043,0.085393,0.255010,0.048960,0.022870,0.028841,0.015893,1
3,0.008,0.844041,0.007302,0.117927,0.282160,0.067376,0.033753,0.035809,0.022016,1
4,0.010,0.788621,0.007005,0.109755,0.103721,0.021477,0.018514,0.021000,0.010505,1
...,...,...,...,...,...,...,...,...,...,...
378849,123.752,0.001732,0.022693,0.005357,0.216207,0.001613,0.003076,0.000859,0.000244,6
378850,123.754,0.002075,0.012682,0.006859,0.147129,0.005849,0.000275,0.000023,0.002928,6
378851,123.756,0.005819,0.011530,0.017442,0.034576,0.007424,0.000576,0.001746,0.001596,6
378852,123.758,0.002923,0.007873,0.015670,0.014390,0.010850,0.001370,0.000190,0.001828,6


In [9]:
# Perform grouping based on the 'class' column
grouped = final_df.groupby('class')
print(grouped)
# Create an empty dictionary to store the separate dataframes for each class
class_dataframes = {}

# Iterate over each group (class) and create a separate dataframe
for class_label, group_df in grouped:
    class_dataframes[class_label] = group_df.copy()

# Access the separate dataframes for each class using the class label as the key
class_1_dataframe = class_dataframes[1]
class_1_dataframe = class_1_dataframe.loc[ : , :]
class_1_dataframe['class'] = 1

class_2_dataframe = class_dataframes[2]
class_2_dataframe = class_2_dataframe.loc[ : , :]
class_2_dataframe['class'] = 2

class_3_dataframe = class_dataframes[3]
class_3_dataframe = class_3_dataframe.loc[ : , :]
class_3_dataframe['class'] = 3

class_4_dataframe = class_dataframes[4]
class_4_dataframe = class_4_dataframe.loc[ : , :]
class_4_dataframe['class'] = 4

class_5_dataframe = class_dataframes[5]
class_5_dataframe = class_5_dataframe.loc[ : , :]
class_5_dataframe['class'] = 5

class_6_dataframe = class_dataframes[6]
class_6_dataframe = class_6_dataframe.loc[ : , :]
class_6_dataframe['class'] = 6

# Example: Printing the separate dataframes
print("Dataframe for Class 1:")
print(class_1_dataframe)
print()

print("Dataframe for Class 2:")
print(class_2_dataframe)
print()

print("Dataframe for Class 3:")
print(class_3_dataframe)

print("Dataframe for Class 6:")
print(class_6_dataframe)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f5afdbd25c0>
Dataframe for Class 1:
       seconds     data1     data2     data3     data4     data5     data6  \
0        0.002  0.000613  0.001049  0.004225  0.012485  0.000468  0.001777   
1        0.004  0.956846  0.011618  0.030003  0.111844  0.025706  0.010426   
2        0.006  0.908362  0.012043  0.085393  0.255010  0.048960  0.022870   
3        0.008  0.844041  0.007302  0.117927  0.282160  0.067376  0.033753   
4        0.010  0.788621  0.007005  0.109755  0.103721  0.021477  0.018514   
...        ...       ...       ...       ...       ...       ...       ...   
63760  127.522  0.022490  0.006615  0.059849  0.209969  0.036144  0.006052   
63761  127.524  0.107816  0.013601  0.091302  0.248998  0.058166  0.012221   
63762  127.526  0.056304  0.003236  0.040680  0.142767  0.033818  0.008498   
63763  127.528  0.124491  0.000522  0.023569  0.150122  0.015237  0.002870   
63764  127.530  0.074360  0.001401  0.055548  0

In [10]:
#calculating approximation coefficients by dwt
def coeffs(df, wavelet = 'sym4', level = 10):
    data = df.columns.tolist()
    data = data[1:-1]
    #print(data)
    
    approx_df = pd.DataFrame(columns = data)
    details_df = pd.DataFrame(columns = data)
    # Iterate over the columns in the dataframe
    for column in data:
        # Perform DWT on the column data
        coeffs = pywt.wavedec(df[column], wavelet, level = level)
        
        # Extract the approximation coefficients
        approx_coeffs = coeffs[0]
        details_coeffs = coeffs[1]
        
        for j in range(1, len(approx_coeffs)+1):
            approx_df[column] = approx_coeffs
            details_df[column] = details_coeffs
        approx_df['class'] = df['class'].values[0]
        details_df['class'] = df['class'].values[1]
    return approx_df, details_df


df_class1_approx = coeffs(class_1_dataframe)[0]
df_class2_approx = coeffs(class_2_dataframe)[0]
df_class3_approx = coeffs(class_3_dataframe)[0]
df_class4_approx = coeffs(class_4_dataframe)[0]
df_class5_approx = coeffs(class_5_dataframe)[0]
df_class6_approx = coeffs(class_6_dataframe)[0]
df_class1_details = coeffs(class_1_dataframe)[1]
df_class2_details = coeffs(class_2_dataframe)[1]
df_class3_details = coeffs(class_3_dataframe)[1]
df_class4_details = coeffs(class_4_dataframe)[1]
df_class5_details = coeffs(class_5_dataframe)[1]
df_class6_details = coeffs(class_6_dataframe)[1]

print(df_class1_approx)
print(df_class6_approx)
print(df_class1_details)
print(df_class6_details)

        data1     data2     data3     data4     data5     data6     data7  \
0   17.261028  0.233085  1.054185  3.289182  0.646551  0.323208  0.362880   
1   18.791972  0.218241  1.004792  3.160344  0.636965  0.303699  0.341973   
2    7.517735  0.359031  1.352812  4.142039  0.830080  0.440532  0.506595   
3    0.037226  0.453570  1.555651  4.728338  1.127243  0.557710  0.619816   
4    0.467514  0.553521  1.653131  4.737151  1.136429  0.603048  0.651832   
..        ...       ...       ...       ...       ...       ...       ...   
64   0.860980  0.236461  1.591395  5.342091  1.162057  0.251440  2.982567   
65   2.471000  0.156754  1.663165  5.985884  1.201232  0.231868  7.714176   
66   2.543029  0.154824  1.661516  6.018080  1.202942  0.232809  7.918738   
67   2.508153  0.155495  1.662951  5.992689  1.200496  0.231864  7.821042   
68   1.844014  0.189508  1.606235  5.669686  1.171225  0.236168  5.863937   

       data8  class  
0   0.222239      1  
1   0.212599      1  
2   0.299

In [11]:
final_df_approx = pd.concat([df_class1_approx, df_class2_approx, df_class3_approx, df_class4_approx, df_class5_approx, df_class6_approx], ignore_index=True)
final_df_details = pd.concat([df_class1_details,df_class2_details,df_class3_details, df_class4_details, df_class5_details, df_class6_details], ignore_index=True)

# Print the combined DataFrame
final_df_approx


Unnamed: 0,data1,data2,data3,data4,data5,data6,data7,data8,class
0,17.261028,0.233085,1.054185,3.289182,0.646551,0.323208,0.362880,0.222239,1
1,18.791972,0.218241,1.004792,3.160344,0.636965,0.303699,0.341973,0.212599,1
2,7.517735,0.359031,1.352812,4.142039,0.830080,0.440532,0.506595,0.299962,1
3,0.037226,0.453570,1.555651,4.728338,1.127243,0.557710,0.619816,0.365921,1
4,0.467514,0.553521,1.653131,4.737151,1.136429,0.603048,0.651832,0.398648,1
...,...,...,...,...,...,...,...,...,...
405,0.153274,0.475463,0.523959,2.569312,0.424167,0.154487,0.396791,0.209755,6
406,0.076447,0.312237,0.342409,2.096912,0.202307,0.025613,-0.019038,0.032626,6
407,0.079854,0.303897,0.343658,2.109003,0.179577,0.026284,0.005412,0.042748,6
408,0.057217,0.282835,0.305431,2.049546,0.174430,0.008542,-0.099689,0.000454,6


In [12]:
final_df_approx.to_csv("approx_df.csv")