### 3 different methods for data preprocessing

Window sliding at 10 hz is used commonly, but step size is different.  
1. step size = 1 (Dataset1.csv & Dataset1_1.csv)
2. step size = 5 (Dataset2.csv)
3. step size = 10 (Dataset3.csv)

### Contents
1. Magnitude
2. Data preprocessing
- 2.1 step size = 1
- 2.2 step size = 5
- 2.3 step size = 10

### 1. Magnitude
Following is feautres.

 1. acceleration from the chest sensor (X axis)  
 2. acceleration from the chest sensor (Y axis)  
 3. acceleration from the chest sensor (Z axis)  
 4. electrocardiogram signal (lead 1)  
 5. electrocardiogram signal (lead 2)  
 6. acceleration from the left-ankle sensor (X axis)  
 7. acceleration from the left-ankle sensor (Y axis)  
 8. acceleration from the left-ankle sensor (Z axis)  
 9. gyro from the left-ankle sensor (X axis)  
 10. gyro from the left-ankle sensor (Y axis)  
 11. gyro from the left-ankle sensor (Z axis)  
 13. magnetometer from the left-ankle sensor (X axis)  
 13. magnetometer from the left-ankle sensor (Y axis)  
 14. magnetometer from the left-ankle sensor (Z axis)  
 15. acceleration from the right-lower-arm sensor (X axis)  
 16. acceleration from the right-lower-arm sensor (Y axis)  
 17. acceleration from the right-lower-arm sensor (Z axis)  
 18. gyro from the right-lower-arm sensor (X axis)  
 19. gyro from the right-lower-arm sensor (Y axis)  
 20. gyro from the right-lower-arm sensor (Z axis)  
 21. magnetometer from the right-lower-arm sensor (X axis)  
 22. magnetometer from the right-lower-arm sensor (Y axis)  
 23. magnetometer from the right-lower-arm sensor (Z axis)    
 24. Label (0 for the null class)  
  
According to UCI, most features are vector, so magnitude has to be calculated for preprocessing.


#### 1.1 Group by subject and based on this, group by label and then calculate them.

In [1]:
import preprocessing as pre # module for data-preprocessing.
import numpy as np
import pandas as pd

In [2]:
"""
Read all files and store them to list
"""
data_list = [] # subject list.
for i in range(1,11):
    data = pre.read('mHealth_subject'+str(i)+'.log')
    data_list.append(data)

In [3]:
subject_dict = dict() # key : Subject, value : A list for classified label


for i, subject in enumerate(data_list):
    classified_label = []
    for label in range(1,13):
        classified_label.append(subject[subject[24]==label])
    subject_dict[i] = classified_label
    
    

In [6]:
subject_dict[1][0].head(5) # Data that label = 1 for subject2

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
43776,-9.6133,-0.14315,0.6608,-0.087912,-0.025118,0.6675,-9.7436,0.88408,0.62338,-0.69043,...,-2.938,-9.0306,1.9443,0.076471,-0.76797,0.91379,0.36633,0.7259,0.36108,1
43777,-9.5518,-0.18689,1.0569,-0.075353,-0.020931,0.25949,-9.7635,0.879,0.63451,-0.68293,...,-2.985,-8.8887,2.1285,0.076471,-0.76797,0.91379,0.18492,0.53899,-0.3627,1
43778,-9.7872,-0.033346,0.61805,-0.058608,-0.004186,0.54565,-9.863,0.8248,0.63451,-0.68293,...,-2.9167,-8.9579,2.2084,0.076471,-0.76797,0.91379,0.54577,0.71692,-0.35726,1
43779,-9.3277,-0.28358,0.84816,-0.092098,-0.03349,0.57785,-9.8214,1.0251,0.63451,-0.68293,...,-2.9838,-8.909,2.0962,0.076471,-0.76797,0.91379,0.36629,0.72229,0.000103,1
43780,-9.7337,-0.18493,0.67809,-0.075353,-0.037677,0.54392,-9.7248,0.79003,0.63451,-0.68293,...,-3.1474,-8.9477,2.2297,0.07451,-0.77207,0.90302,0.18489,0.53538,-0.72368,1


In [5]:
# For each data, start index from 0
for subject in subject_dict.values():
    for data in subject : # data means classified with label for one subject.
        pre.reindex(data)
subject_dict[1][0].head(5)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
0,-9.6133,-0.14315,0.6608,-0.087912,-0.025118,0.6675,-9.7436,0.88408,0.62338,-0.69043,...,-2.938,-9.0306,1.9443,0.076471,-0.76797,0.91379,0.36633,0.7259,0.36108,1
1,-9.5518,-0.18689,1.0569,-0.075353,-0.020931,0.25949,-9.7635,0.879,0.63451,-0.68293,...,-2.985,-8.8887,2.1285,0.076471,-0.76797,0.91379,0.18492,0.53899,-0.3627,1
2,-9.7872,-0.033346,0.61805,-0.058608,-0.004186,0.54565,-9.863,0.8248,0.63451,-0.68293,...,-2.9167,-8.9579,2.2084,0.076471,-0.76797,0.91379,0.54577,0.71692,-0.35726,1
3,-9.3277,-0.28358,0.84816,-0.092098,-0.03349,0.57785,-9.8214,1.0251,0.63451,-0.68293,...,-2.9838,-8.909,2.0962,0.076471,-0.76797,0.91379,0.36629,0.72229,0.000103,1
4,-9.7337,-0.18493,0.67809,-0.075353,-0.037677,0.54392,-9.7248,0.79003,0.63451,-0.68293,...,-3.1474,-8.9477,2.2297,0.07451,-0.77207,0.90302,0.18489,0.53538,-0.72368,1


In [6]:
sub_mag_dict = dict() # key : subject, value : A list for magnitude

for i, subject in enumerate(subject_dict.values()):
    mag_list = []
    for data in subject :
        mag_list.append(pre.magnitude(data))
    sub_mag_dict[i] = mag_list

finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
finished
f

In [7]:
pd.DataFrame(sub_mag_dict[1][0]).head()

Unnamed: 0,0,3,6,9,12,15,18
0,9.637048,9.80637,0.952813,0.693234,9.693497,1.196093,0.889667
1,9.611912,9.806422,0.955178,0.624168,9.615078,1.196093,0.675468
2,9.806752,9.912457,0.955178,0.570472,9.676164,1.196093,0.969265
3,9.370474,9.891645,0.955178,0.395701,9.626391,1.196093,0.809859
4,9.759043,9.771987,0.955178,0.498894,9.743666,1.190415,0.918982


In [8]:
"""
Convert each dictionary item in m_list into DataFrame item.
Store the item to df_list.
"""
sub = dict()
for i, sub_mag in enumerate(sub_mag_dict.values()):
    df_list = []
    for data in sub_mag:
        df_list.append(pd.DataFrame(data))
    sub[i] = df_list
sub[1][0].head(3)

Unnamed: 0,0,3,6,9,12,15,18
0,9.637048,9.80637,0.952813,0.693234,9.693497,1.196093,0.889667
1,9.611912,9.806422,0.955178,0.624168,9.615078,1.196093,0.675468
2,9.806752,9.912457,0.955178,0.570472,9.676164,1.196093,0.969265


In [9]:
"""
lead1, lead2, label were removed before, because they are scalar.
After magnitude was calculated, store these data to df_list 
"""

for i, subject in enumerate(subject_dict.values()):
    for j in range(12):
        sub[i][j]['lead1'] = subject[j][4]
        sub[i][j]['lead2'] = subject[j][5]
        sub[i][j]['label'] = subject[j][24]
        sub[i][j]['subject'] = i+1
sub[1][0].head(3)

Unnamed: 0,0,3,6,9,12,15,18,lead1,lead2,label,subject
0,9.637048,9.80637,0.952813,0.693234,9.693497,1.196093,0.889667,-0.087912,-0.025118,1,2
1,9.611912,9.806422,0.955178,0.624168,9.615078,1.196093,0.675468,-0.075353,-0.020931,1,2
2,9.806752,9.912457,0.955178,0.570472,9.676164,1.196093,0.969265,-0.058608,-0.004186,1,2


In [11]:
mag_data = []

for i in range(10):
    mag_data.append(sub[i])
    
mdata = []
for item in mag_data:
    mdata.append(pd.concat(item)) 
    
mdata = pd.concat(mdata) # data classified with label for each subject(1~9) are incorporated in one data 
pre.reindex(mdata) # to make index unique using order
mdata.tail()

Unnamed: 0,0,3,6,9,12,15,18,lead1,lead2,label,subject
343190,23.147916,19.699651,1.087267,96.814789,9.930761,1.111255,16.907605,0.13396,0.16327,12,10
343191,25.269968,29.672762,1.087267,69.990482,23.919215,1.111255,42.621431,-0.087912,-0.083726,12,10
343192,25.39307,20.842016,1.087267,40.37569,28.693835,1.111255,39.102382,-0.30979,-0.20094,12,10
343193,23.306285,21.030763,1.071011,12.271827,28.244877,1.124044,39.346309,-0.44375,-0.25955,12,10
343194,15.31656,19.901571,1.071011,58.701728,25.544977,1.124044,48.312489,1.9089,1.3857,12,10


In [13]:
# Give name to column
columns = [
    'Mag_acc_chest',
    'Mag_acc_left_ankle',
    'Mag_gyro_left_ankle',
    'Mag_magneto_left_ankle',
    'Mag_acc_right_lower_arm',
    'Mag_gyro_right_lower_arm',
    'Mag_magneto_right_lower_arm',
    'lead1',
    'lead2',
    'label',
    'subject'
]
mdata.columns = columns
mdata.head(3)

Unnamed: 0,Mag_acc_chest,Mag_acc_left_ankle,Mag_gyro_left_ankle,Mag_magneto_left_ankle,Mag_acc_right_lower_arm,Mag_gyro_right_lower_arm,Mag_magneto_right_lower_arm,lead1,lead2,label,subject
0,9.867577,9.823208,1.045822,1.076806,9.669954,0.99763,0.621168,0.008373,-0.03349,1,1
1,9.804517,9.840192,1.045822,1.392666,9.797777,0.99763,0.8304,-0.025118,-0.025118,1,1
2,9.893964,9.903023,1.025506,1.176963,9.760349,0.99763,0.621168,0.025118,0.016745,1,1


In [14]:
mdata.to_csv('mag_n_lead_dataset.csv', index = False)

Now Every magnitude and 2 number of lead are generated.  
Next step is data preprocessing.

### 2. Data preprocessing

In window, various operations are done.
Following is kind of operation to be calculated in window.
 1. Mean
 2. Standard deviation
 3. Max
 4. Median
 5. Min
 6. Entropy



- 2.1 Window sliding at 10 hz where step size = 1

In [7]:
new_data = pd.read_csv('mag_n_lead_dataset.csv')

In [9]:
"""
Classify data with label and store them to list.
"""
group_by_subject = dict()

for i in range(10):
    group_by_label = []
    subject = new_data[new_data['subject'] == i+1]
    for j in range(1,13):        
        group_by_label.append(subject[subject['label'] == j])
    group_by_subject[i] = group_by_label
    
group_by_subject[9][0]

Unnamed: 0,Mag_acc_chest,Mag_acc_left_ankle,Mag_gyro_left_ankle,Mag_magneto_left_ankle,Mag_acc_right_lower_arm,Mag_gyro_right_lower_arm,Mag_magneto_right_lower_arm,lead1,lead2,label,subject
309505,9.677826,9.615028,0.995100,0.889191,9.908713,1.079980,1.171030,0.029304,0.046049,1,10
309506,9.601128,9.909079,0.987824,0.798656,9.715389,1.079980,0.969265,0.121400,0.121400,1,10
309507,9.993698,9.724534,0.987824,0.684991,9.884676,1.091608,0.967491,0.041863,0.041863,1,10
309508,9.697443,9.800865,0.987824,1.108532,9.830237,1.091608,0.824573,-0.054422,-0.046049,1,10
309509,9.692549,9.735652,0.987824,0.798656,9.705126,1.091608,0.737805,-0.138150,-0.104660,1,10
...,...,...,...,...,...,...,...,...,...,...,...
312572,9.658964,9.843652,0.982899,0.710598,9.687022,1.095174,0.824573,-0.050235,-0.033490,1,10
312573,9.720489,9.875827,0.982899,0.978248,9.743126,1.095174,0.765874,-0.050235,-0.016745,1,10
312574,9.666742,9.824169,0.982184,0.879725,9.687634,1.095174,0.967491,-0.020931,-0.025118,1,10
312575,9.783548,9.789295,0.982184,0.606560,9.871759,1.095174,0.878314,-0.029304,-0.033490,1,10


In [13]:
group_by_subject.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [12]:
# Mean operation.
group_by_sub_mean = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mean = []
    for data in subject:
        group_by_label_mean.append(pd.DataFrame(pre.mean_at_10hz(data.iloc[:,:-2])))
    group_by_sub_mean[i] = group_by_label_mean

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has 

In [14]:
group_by_sub_mean[9][1].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,9.708192,9.867127,1.131036,0.814699,9.788086,0.957766,1.116203,-0.090841,-0.064469
1,9.70817,9.858954,1.130278,0.853038,9.789174,0.957707,1.064864,-0.082888,-0.060282
2,9.684482,9.8715,1.12952,0.88366,9.756339,0.956919,1.08693,-0.08205,-0.06112
3,9.704068,9.886211,1.128762,0.842706,9.744569,0.95613,1.094806,-0.086237,-0.065725
4,9.691809,9.896638,1.129951,0.843403,9.74914,0.955342,1.085577,-0.087493,-0.069074


In [15]:
# Give name to column
columns = [
    'Mag_acc_chest_mean',
    'Mag_acc_left_ankle_mean',
    'Mag_gyro_left_ankle_mean',
    'Mag_magneto_left_ankle_mean',
    'Mag_acc_right_lower_arm_mean',
    'Mag_gyro_right_lower_arm_mean',
    'Mag_magneto_right_lower_arm_mean',
    'lead1_mean',
    'lead2_mean'
]

for subject in group_by_sub_mean.values():
    for data in subject:
        data.columns = columns

In [16]:
# standard deviation operation.
group_by_sub_std = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_std = []
    for data in subject:
        group_by_label_std.append(pd.DataFrame(pre.std_at_10hz(data.iloc[:,:-2])))
    group_by_sub_std[i] = group_by_label_std

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has 

In [17]:
columns = [
    'Mag_acc_chest_std',
    'Mag_acc_left_ankle_std',
    'Mag_gyro_left_ankle_std',
    'Mag_magneto_left_ankle_std',
    'Mag_acc_right_lower_arm_std',
    'Mag_gyro_right_lower_arm_std',
    'Mag_magneto_right_lower_arm_std',
    'lead1_std',
    'lead2_std'
]
for sub in group_by_sub_std.values():
    for data in sub:
        data.columns = columns
        
group_by_sub_std[0][0].head()

Unnamed: 0,Mag_acc_chest_std,Mag_acc_left_ankle_std,Mag_gyro_left_ankle_std,Mag_magneto_left_ankle_std,Mag_acc_right_lower_arm_std,Mag_gyro_right_lower_arm_std,Mag_magneto_right_lower_arm_std,lead1_std,lead2_std
0,0.056647,0.051061,0.007542,0.24473,0.037394,6e-05,0.207926,0.322826,0.090431
1,0.06633,0.067068,0.00599,0.273312,0.029562,0.000143,0.212262,0.331275,0.101425
2,0.069226,0.07277,0.002895,0.272897,0.069266,0.000177,0.218537,0.331275,0.101342
3,0.055763,0.076594,0.002753,0.281988,0.093643,0.00019,0.265595,0.331409,0.099608
4,0.085557,0.075224,0.004133,0.221524,0.10047,0.002407,0.33498,0.325958,0.081803


In [18]:
# max operation.
group_by_sub_max = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_max = []
    for data in subject:
        group_by_label_max.append(pd.DataFrame(pre.max_at_10hz(data.iloc[:,:-2])))
    group_by_sub_max[i] = group_by_label_max

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has 

In [19]:
columns = [
    'Mag_acc_chest_max',
    'Mag_acc_left_ankle_max',
    'Mag_gyro_left_ankle_max',
    'Mag_magneto_left_ankle_max',
    'Mag_acc_right_lower_arm_max',
    'Mag_gyro_right_lower_arm_max',
    'Mag_magneto_right_lower_arm_max',
    'lead1_max',
    'lead2_max'
]
for sub in group_by_sub_max.values():
    for data in sub:
        data.columns = columns
group_by_sub_max[0][0].head()

Unnamed: 0,Mag_acc_chest_max,Mag_acc_left_ankle_max,Mag_gyro_left_ankle_max,Mag_magneto_left_ankle_max,Mag_acc_right_lower_arm_max,Mag_gyro_right_lower_arm_max,Mag_magneto_right_lower_arm_max,lead1_max,lead2_max
0,9.893964,9.921387,1.045822,1.75013,9.807401,0.99763,1.13409,0.74935,0.12977
1,9.893964,9.921387,1.045822,1.75013,9.807401,0.99763,1.13409,0.74935,0.12977
2,9.893964,9.921387,1.032458,1.75013,9.968867,0.99763,1.13409,0.74935,0.12977
3,9.839725,9.940715,1.032458,1.75013,9.989964,0.997498,1.204295,0.74935,0.12977
4,9.839725,9.940715,1.039652,1.519349,9.989964,1.005359,1.489597,0.74935,0.046049


In [20]:
# min operation.
group_by_sub_min = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_min = []
    for data in subject:
        group_by_label_min.append(pd.DataFrame(pre.min_at_10hz(data.iloc[:,:-2])))
    group_by_sub_min[i] = group_by_label_min

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has 

In [21]:
columns = [
    'Mag_acc_chest_min',
    'Mag_acc_left_ankle_min',
    'Mag_gyro_left_ankle_min',
    'Mag_magneto_left_ankle_min',
    'Mag_acc_right_lower_arm_min',
    'Mag_gyro_right_lower_arm_min',
    'Mag_magneto_right_lower_arm_min',
    'lead1_min',
    'lead2_min'
]

for sub in group_by_sub_min.values():
    for data in sub:
        data.columns = columns
group_by_sub_min[0][0].head()

Unnamed: 0,Mag_acc_chest_min,Mag_acc_left_ankle_min,Mag_gyro_left_ankle_min,Mag_magneto_left_ankle_min,Mag_acc_right_lower_arm_min,Mag_gyro_right_lower_arm_min,Mag_magneto_right_lower_arm_min,lead1_min,lead2_min
0,9.709916,9.760252,1.025506,0.973888,9.669954,0.997498,0.35912,-0.66562,-0.21769
1,9.653527,9.715958,1.025506,0.840166,9.707763,0.997084,0.35912,-0.66562,-0.21769
2,9.653527,9.715958,1.025506,0.840166,9.707763,0.997084,0.35912,-0.66562,-0.21769
3,9.653527,9.715958,1.025506,0.840166,9.707763,0.997084,0.35912,-0.66562,-0.21769
4,9.535439,9.715958,1.025506,0.840166,9.707763,0.997084,0.35912,-0.66562,-0.21769


In [22]:
# median operation.
group_by_sub_mid = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mid = []
    for data in subject:
        group_by_label_mid.append(pd.DataFrame(pre.median_at_10hz(data.iloc[:,:-2])))
    group_by_sub_mid[i] = group_by_label_mid

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
7 col has been finished
size of 7 col : 3064
8 col has been finished
size of 8 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has 

In [23]:
columns = [
    'Mag_acc_chest_median',
    'Mag_acc_left_ankle_median',
    'Mag_gyro_left_ankle_median',
    'Mag_magneto_left_ankle_median',
    'Mag_acc_right_lower_arm_median',
    'Mag_gyro_right_lower_arm_median',
    'Mag_magneto_right_lower_arm_median',
    'lead1_median',
    'lead2_median'
]

for sub in group_by_sub_mid.values():
    for data in sub:
        data.columns = columns
group_by_sub_mid[9][0].head()

Unnamed: 0,Mag_acc_chest_median,Mag_acc_left_ankle_median,Mag_gyro_left_ankle_median,Mag_magneto_left_ankle_median,Mag_acc_right_lower_arm_median,Mag_gyro_right_lower_arm_median,Mag_magneto_right_lower_arm_median,lead1_median,lead2_median
0,9.76516,9.780499,0.987824,0.798656,9.808951,1.080134,0.867502,-0.012559,-0.002093
1,9.794398,9.818247,0.987824,0.798656,9.808951,1.085871,0.837955,-0.069074,-0.046049
2,9.794398,9.780499,0.987824,0.798656,9.808951,1.091608,0.783224,-0.110938,-0.064888
3,9.794398,9.780499,0.987824,0.812919,9.779745,1.087769,0.73984,-0.14024,-0.094193
4,9.801056,9.747893,0.986397,0.798656,9.779745,1.083931,0.638915,-0.14861,-0.10466


In [24]:
# entropy operation.
group_by_sub_etr = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_etr = []
    for data in subject:
        group_by_label_etr.append(pd.DataFrame(pre.entropy_at_10hz(data.iloc[:,:-4])))
    group_by_sub_etr[i] = group_by_label_etr

0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has been finished
size of 1 col : 3064
2 col has been finished
size of 2 col : 3064
3 col has been finished
size of 3 col : 3064
4 col has been finished
size of 4 col : 3064
5 col has been finished
size of 5 col : 3064
6 col has been finished
size of 6 col : 3064
0 col has been finished
size of 0 col : 3064
1 col has 

In [25]:
columns = [
    'Mag_acc_chest_etr',
    'Mag_acc_left_ankle_etr',
    'Mag_gyro_left_ankle_etr',
    'Mag_magneto_left_ankle_etr',
    'Mag_acc_right_lower_arm_etr',
    'Mag_gyro_right_lower_arm_etr',
    'Mag_magneto_right_lower_arm_etr',
]

for sub in group_by_sub_etr.values():
    for data in sub:
        data.columns = columns
group_by_sub_etr[0][0]

Unnamed: 0,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr
0,3.321904,3.321909,3.321890,3.295995,3.321917,3.321928,3.257426
1,3.321895,3.321895,3.321904,3.287783,3.321921,3.321928,3.253538
2,3.321892,3.321889,3.321922,3.287019,3.321892,3.321928,3.250609
3,3.321905,3.321884,3.321923,3.283856,3.321862,3.321928,3.230988
4,3.321872,3.321886,3.321916,3.295405,3.321853,3.321924,3.207974
...,...,...,...,...,...,...,...
3059,3.321788,3.321871,3.321898,3.299469,3.321886,3.321902,3.222237
3060,3.321817,3.321872,3.321895,3.299453,3.321875,3.321907,3.234457
3061,3.321817,3.321892,3.321894,3.306050,3.321877,3.321916,3.287708
3062,3.321826,3.321892,3.321895,3.306050,3.321881,3.321918,3.293057


In [26]:
# Give label to only one DataFrame.
for i, subject in enumerate(group_by_sub_etr.values()):
    for j, data in enumerate(subject):
        data['label'] = j+1
        data['subject'] = i+1

In [27]:
group_by_sub_etr[0][0]

Unnamed: 0,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr,label,subject
0,3.321904,3.321909,3.321890,3.295995,3.321917,3.321928,3.257426,1,1
1,3.321895,3.321895,3.321904,3.287783,3.321921,3.321928,3.253538,1,1
2,3.321892,3.321889,3.321922,3.287019,3.321892,3.321928,3.250609,1,1
3,3.321905,3.321884,3.321923,3.283856,3.321862,3.321928,3.230988,1,1
4,3.321872,3.321886,3.321916,3.295405,3.321853,3.321924,3.207974,1,1
...,...,...,...,...,...,...,...,...,...
3059,3.321788,3.321871,3.321898,3.299469,3.321886,3.321902,3.222237,1,1
3060,3.321817,3.321872,3.321895,3.299453,3.321875,3.321907,3.234457,1,1
3061,3.321817,3.321892,3.321894,3.306050,3.321877,3.321916,3.287708,1,1
3062,3.321826,3.321892,3.321895,3.306050,3.321881,3.321918,3.293057,1,1


In [28]:
mean_list = []
std_list = []
max_list = []
min_list = []
mid_list = []
etr_list = []

for sub in group_by_sub_mean.values():
    for data in sub:
        mean_list.append(data)

for sub in group_by_sub_std.values():
    for data in sub:
        std_list.append(data)
    
for sub in group_by_sub_max.values():
    for data in sub:
        max_list.append(data)
    
for sub in group_by_sub_min.values():
    for data in sub:
        min_list.append(data)
    
for sub in group_by_sub_mid.values():
    for data in sub:
        mid_list.append(data)
    
for sub in group_by_sub_etr.values():
    for data in sub:
        etr_list.append(data)

In [29]:
mean_df = pd.concat(mean_list)
std_df = pd.concat(std_list)
max_df = pd.concat(max_list)
min_df = pd.concat(min_list)
mid_df = pd.concat(mid_list)
etr_df = pd.concat(etr_list)

In [30]:
pre.reindex(mean_df)
pre.reindex(std_df)
pre.reindex(max_df)
pre.reindex(min_df)
pre.reindex(mid_df)
pre.reindex(etr_df)

In [31]:
etr_df

Unnamed: 0,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr,label,subject
0,3.321904,3.321909,3.321890,3.295995,3.321917,3.321928,3.257426,1,1
1,3.321895,3.321895,3.321904,3.287783,3.321921,3.321928,3.253538,1,1
2,3.321892,3.321889,3.321922,3.287019,3.321892,3.321928,3.250609,1,1
3,3.321905,3.321884,3.321923,3.283856,3.321862,3.321928,3.230988,1,1
4,3.321872,3.321886,3.321916,3.295405,3.321853,3.321924,3.207974,1,1
...,...,...,...,...,...,...,...,...,...
342230,2.657100,2.975842,3.321878,3.200179,2.886883,3.321889,3.050667,12,10
342231,2.808047,3.012121,3.321875,3.200151,2.778024,3.321906,3.154444,12,10
342232,2.957839,3.056333,3.321881,3.130295,2.747593,3.321904,3.211925,12,10
342233,3.076690,3.109237,3.321886,3.167582,2.780772,3.321907,3.191337,12,10


In [32]:
_list = [mean_df,
         std_df,
         max_df,
         min_df,
         mid_df,
         etr_df
        ]
train_dataset1 = pd.concat(_list, axis = 1)

In [33]:
train_dataset1.isnull().values.any() # There is no unvalid value.

False

In [34]:
train_dataset1.tail()

Unnamed: 0,Mag_acc_chest_mean,Mag_acc_left_ankle_mean,Mag_gyro_left_ankle_mean,Mag_magneto_left_ankle_mean,Mag_acc_right_lower_arm_mean,Mag_gyro_right_lower_arm_mean,Mag_magneto_right_lower_arm_mean,lead1_mean,lead2_mean,Mag_acc_chest_std,...,lead2_median,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr,label,subject
342230,12.263567,14.739647,1.074304,57.065313,6.882884,1.112576,34.542102,0.034746,0.078703,11.198067,...,0.094191,2.6571,2.975842,3.321878,3.200179,2.886883,3.321889,3.050667,12,10
342231,14.647983,15.986811,1.07645,52.938533,9.348997,1.11407,29.825523,0.006697,0.05191,11.201306,...,0.094191,2.808047,3.012121,3.321875,3.200151,2.778024,3.321906,3.154444,12,10
342232,16.850213,17.316417,1.07697,47.610624,11.393153,1.115768,27.43797,-0.042283,0.018839,10.500306,...,0.094191,2.957839,3.056333,3.321881,3.130295,2.747593,3.321904,3.211925,12,10
342233,18.206007,18.622993,1.07749,51.235644,13.219525,1.117465,28.714306,0.137723,0.144432,9.267028,...,0.094191,3.07669,3.109237,3.321886,3.167582,2.780772,3.321907,3.191337,12,10
342234,19.947034,20.389064,1.078787,53.658541,14.234058,1.11862,29.457163,0.143723,0.147455,8.068956,...,0.092098,3.012803,3.043583,3.169889,3.024088,2.664141,3.169909,3.033559,12,10


In [35]:
# limitation of storage so two csv files are used for dataset1
a = train_dataset1.iloc[:302008,:]
b = train_dataset1.iloc[302008:,:]

In [36]:
a.isnull().values.any()

False

In [37]:
b.isnull().values.any()

False

Due to limited storage capacity, two files are used to store data.

In [35]:
a.to_csv('Dataset1.csv', index = False)
b.to_csv('Dataset1_1.csv',index = False)

- 2.2 Window sliding at 10hz where step size = 5

In [46]:
new_data = pd.read_csv('mag_n_lead_dataset.csv')

In [3]:
"""
Classify data with label and store them to list.
"""
group_by_subject = dict()

for i in range(10):
    group_by_label = []
    subject = new_data[new_data['subject'] == i+1]
    for j in range(1,13):        
        group_by_label.append(subject[subject['label'] == j])
    group_by_subject[i] = group_by_label
    
group_by_subject[0][0]

Unnamed: 0,Mag_acc_chest,Mag_acc_left_ankle,Mag_gyro_left_ankle,Mag_magneto_left_ankle,Mag_acc_right_lower_arm,Mag_gyro_right_lower_arm,Mag_magneto_right_lower_arm,lead1,lead2,label,subject
0,9.867577,9.823208,1.045822,1.076806,9.669954,0.997630,0.621168,0.008373,-0.033490,1,1
1,9.804517,9.840192,1.045822,1.392666,9.797777,0.997630,0.830400,-0.025118,-0.025118,1,1
2,9.893964,9.903023,1.025506,1.176963,9.760349,0.997630,0.621168,0.025118,0.016745,1,1
3,9.783027,9.898630,1.025506,1.750130,9.752004,0.997498,0.535752,0.180010,0.129770,1,1
4,9.826135,9.827709,1.025506,1.392666,9.782695,0.997498,0.744476,0.092098,0.046049,1,1
...,...,...,...,...,...,...,...,...,...,...,...
3067,9.918062,9.929257,1.040996,0.603954,9.926187,1.007188,1.098038,-0.669810,-0.330720,1,1
3068,9.728726,9.699472,1.040996,0.992579,9.642964,1.010210,0.842011,0.858190,0.226060,1,1
3069,9.964320,9.842705,1.040565,0.992579,9.646205,1.010210,1.150000,-0.427000,-0.259550,1,1
3070,9.955235,9.841280,1.040565,1.105531,9.771362,1.010210,0.842011,-0.037677,-0.012559,1,1


In [5]:
# Mean operation.
group_by_sub_mean = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mean = []
    for data in subject:
        group_by_label_mean.append(pd.DataFrame(pre.mean_at_10hz(data.iloc[:,:-2],stride = 5)))
    group_by_sub_mean[i] = group_by_label_mean

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 

In [6]:
# Give name to column
columns = [
    'Mag_acc_chest_mean',
    'Mag_acc_left_ankle_mean',
    'Mag_gyro_left_ankle_mean',
    'Mag_magneto_left_ankle_mean',
    'Mag_acc_right_lower_arm_mean',
    'Mag_gyro_right_lower_arm_mean',
    'Mag_magneto_right_lower_arm_mean',
    'lead1_mean',
    'lead2_mean'
]

for subject in group_by_sub_mean.values():
    for data in subject:
        data.columns = columns

In [7]:
# standard deviation operation.
group_by_sub_std = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_std = []
    for data in subject:
        group_by_label_std.append(pd.DataFrame(pre.std_at_10hz(data.iloc[:,:-2],stride = 5)))
    group_by_sub_std[i] = group_by_label_std

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 

In [8]:
columns = [
    'Mag_acc_chest_std',
    'Mag_acc_left_ankle_std',
    'Mag_gyro_left_ankle_std',
    'Mag_magneto_left_ankle_std',
    'Mag_acc_right_lower_arm_std',
    'Mag_gyro_right_lower_arm_std',
    'Mag_magneto_right_lower_arm_std',
    'lead1_std',
    'lead2_std'
]
for sub in group_by_sub_std.values():
    for data in sub:
        data.columns = columns
        
group_by_sub_std[0][0].head()

Unnamed: 0,Mag_acc_chest_std,Mag_acc_left_ankle_std,Mag_gyro_left_ankle_std,Mag_magneto_left_ankle_std,Mag_acc_right_lower_arm_std,Mag_gyro_right_lower_arm_std,Mag_magneto_right_lower_arm_std,lead1_std,lead2_std
0,0.056647,0.051061,0.007542,0.24473,0.037394,6e-05,0.207926,0.322826,0.090431
1,0.084156,0.075131,0.004752,0.208348,0.106418,0.003211,0.454411,0.323854,0.074445
2,0.09245,0.068822,0.006036,0.124637,0.135921,0.00461,0.539656,0.070678,0.055498
3,0.08996,0.075303,0.004309,0.143448,0.075097,0.004191,0.572372,0.029568,0.040717
4,0.077628,0.136127,0.008002,0.146992,0.044482,0.004646,0.444433,0.030408,0.071026


In [10]:
# max operation.
group_by_sub_max = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_max = []
    for data in subject:
        group_by_label_max.append(pd.DataFrame(pre.max_at_10hz(data.iloc[:,:-2],stride = 5)))
    group_by_sub_max[i] = group_by_label_max

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 

In [11]:
columns = [
    'Mag_acc_chest_max',
    'Mag_acc_left_ankle_max',
    'Mag_gyro_left_ankle_max',
    'Mag_magneto_left_ankle_max',
    'Mag_acc_right_lower_arm_max',
    'Mag_gyro_right_lower_arm_max',
    'Mag_magneto_right_lower_arm_max',
    'lead1_max',
    'lead2_max'
]
for sub in group_by_sub_max.values():
    for data in sub:
        data.columns = columns
group_by_sub_max[0][0].head()

Unnamed: 0,Mag_acc_chest_max,Mag_acc_left_ankle_max,Mag_gyro_left_ankle_max,Mag_magneto_left_ankle_max,Mag_acc_right_lower_arm_max,Mag_gyro_right_lower_arm_max,Mag_magneto_right_lower_arm_max,lead1_max,lead2_max
0,9.893964,9.921387,1.045822,1.75013,9.807401,0.99763,1.13409,0.74935,0.12977
1,9.839725,9.940715,1.039652,1.519349,9.989964,1.005359,1.881958,0.74935,0.046049
2,9.86837,9.940715,1.04174,1.239423,9.989964,1.005359,2.562689,0.037677,0.025118
3,9.963018,10.01562,1.047914,1.392666,9.81107,1.005359,2.562689,0.10884,0.066981
4,9.963018,10.01562,1.047914,1.392666,9.836822,1.003851,1.925829,0.13815,0.11722


In [13]:
# min operation.
group_by_sub_min = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_min = []
    for data in subject:
        group_by_label_min.append(pd.DataFrame(pre.min_at_10hz(data.iloc[:,:-2],stride = 5)))
    group_by_sub_min[i] = group_by_label_min

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 

In [14]:
columns = [
    'Mag_acc_chest_min',
    'Mag_acc_left_ankle_min',
    'Mag_gyro_left_ankle_min',
    'Mag_magneto_left_ankle_min',
    'Mag_acc_right_lower_arm_min',
    'Mag_gyro_right_lower_arm_min',
    'Mag_magneto_right_lower_arm_min',
    'lead1_min',
    'lead2_min'
]

for sub in group_by_sub_min.values():
    for data in sub:
        data.columns = columns
group_by_sub_min[0][0].head()

Unnamed: 0,Mag_acc_chest_min,Mag_acc_left_ankle_min,Mag_gyro_left_ankle_min,Mag_magneto_left_ankle_min,Mag_acc_right_lower_arm_min,Mag_gyro_right_lower_arm_min,Mag_magneto_right_lower_arm_min,lead1_min,lead2_min
0,9.709916,9.760252,1.025506,0.973888,9.669954,0.997498,0.35912,-0.66562,-0.21769
1,9.535439,9.715958,1.025506,0.840166,9.707763,0.997084,0.35912,-0.66562,-0.21769
2,9.535439,9.715958,1.02781,0.840166,9.538493,0.995082,0.535752,-0.21769,-0.17582
3,9.625667,9.739173,1.035642,0.942753,9.538493,0.995082,0.652185,0.008373,-0.071167
4,9.701768,9.533806,1.028636,0.889191,9.684612,0.993103,0.179565,0.041863,-0.071167


In [15]:
# median operation.
group_by_sub_mid = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mid = []
    for data in subject:
        group_by_label_mid.append(pd.DataFrame(pre.median_at_10hz(data.iloc[:,:-2],stride = 5)))
    group_by_sub_mid[i] = group_by_label_mid

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
7 col has been finished
size of 7 col : 614
8 col has been finished
size of 8 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 

In [16]:
columns = [
    'Mag_acc_chest_median',
    'Mag_acc_left_ankle_median',
    'Mag_gyro_left_ankle_median',
    'Mag_magneto_left_ankle_median',
    'Mag_acc_right_lower_arm_median',
    'Mag_gyro_right_lower_arm_median',
    'Mag_magneto_right_lower_arm_median',
    'lead1_median',
    'lead2_median'
]

for sub in group_by_sub_mid.values():
    for data in sub:
        data.columns = columns
group_by_sub_mid[9][0].head()

Unnamed: 0,Mag_acc_chest_median,Mag_acc_left_ankle_median,Mag_gyro_left_ankle_median,Mag_magneto_left_ankle_median,Mag_acc_right_lower_arm_median,Mag_gyro_right_lower_arm_median,Mag_magneto_right_lower_arm_median,lead1_median,lead2_median
0,9.76516,9.780499,0.987824,0.798656,9.808951,1.080134,0.867502,-0.012559,-0.002093
1,9.816181,9.797881,0.98497,0.800283,9.786774,1.083931,0.542706,-0.14861,-0.094193
2,9.756201,9.818461,0.985165,0.81727,9.766084,1.090528,0.42158,-0.108844,-0.062795
3,9.709106,9.83523,0.985183,0.852841,9.698542,1.090528,0.746118,-0.03349,-0.010466
4,9.67026,9.844644,0.982991,0.80766,9.753785,1.086408,0.826639,-0.069074,-0.041863


In [17]:
# entropy operation.
group_by_sub_etr = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_etr = []
    for data in subject:
        group_by_label_etr.append(pd.DataFrame(pre.entropy_at_10hz(data.iloc[:,:-4],stride = 5)))
    group_by_sub_etr[i] = group_by_label_etr

0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 1 col : 614
2 col has been finished
size of 2 col : 614
3 col has been finished
size of 3 col : 614
4 col has been finished
size of 4 col : 614
5 col has been finished
size of 5 col : 614
6 col has been finished
size of 6 col : 614
0 col has been finished
size of 0 col : 614
1 col has been finished
size of 

In [18]:
columns = [
    'Mag_acc_chest_etr',
    'Mag_acc_left_ankle_etr',
    'Mag_gyro_left_ankle_etr',
    'Mag_magneto_left_ankle_etr',
    'Mag_acc_right_lower_arm_etr',
    'Mag_gyro_right_lower_arm_etr',
    'Mag_magneto_right_lower_arm_etr',
]

for sub in group_by_sub_etr.values():
    for data in sub:
        data.columns = columns
group_by_sub_etr[0][0]

Unnamed: 0,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr
0,3.321904,3.321909,3.321890,3.295995,3.321917,3.321928,3.257426
1,3.321874,3.321886,3.321913,3.298009,3.321844,3.321921,3.162013
2,3.321863,3.321893,3.321904,3.311754,3.321790,3.321913,3.219169
3,3.321867,3.321886,3.321916,3.309668,3.321885,3.321915,3.200733
4,3.321883,3.321789,3.321885,3.308699,3.321913,3.321913,3.130447
...,...,...,...,...,...,...,...
609,3.321848,3.321860,3.321918,3.293123,3.321853,3.321915,3.093318
610,3.321735,3.321887,3.321886,3.295867,3.321883,3.321884,3.174268
611,3.321744,3.321888,3.321853,3.312681,3.321893,3.321909,3.142235
612,3.321817,3.321872,3.321895,3.299453,3.321875,3.321907,3.234457


In [19]:
# Give label to only one DataFrame.
for i, subject in enumerate(group_by_sub_etr.values()):
    for j, data in enumerate(subject):
        data['label'] = j+1
        data['subject'] = i+1

In [20]:
mean_list = []
std_list = []
max_list = []
min_list = []
mid_list = []
etr_list = []

for sub in group_by_sub_mean.values():
    for data in sub:
        mean_list.append(data)

for sub in group_by_sub_std.values():
    for data in sub:
        std_list.append(data)
    
for sub in group_by_sub_max.values():
    for data in sub:
        max_list.append(data)
    
for sub in group_by_sub_min.values():
    for data in sub:
        min_list.append(data)
    
for sub in group_by_sub_mid.values():
    for data in sub:
        mid_list.append(data)
    
for sub in group_by_sub_etr.values():
    for data in sub:
        etr_list.append(data)

In [21]:
mean_df = pd.concat(mean_list)
std_df = pd.concat(std_list)
max_df = pd.concat(max_list)
min_df = pd.concat(min_list)
mid_df = pd.concat(mid_list)
etr_df = pd.concat(etr_list)

In [22]:
pre.reindex(mean_df)
pre.reindex(std_df)
pre.reindex(max_df)
pre.reindex(min_df)
pre.reindex(mid_df)
pre.reindex(etr_df)

In [23]:
_list = [mean_df,
         std_df,
         max_df,
         min_df,
         mid_df,
         etr_df
        ]
train_dataset2 = pd.concat(_list, axis = 1)

In [24]:
train_dataset2.isnull().values.any()

False

In [27]:
train_dataset2.tail()

Unnamed: 0,Mag_acc_chest_mean,Mag_acc_left_ankle_mean,Mag_gyro_left_ankle_mean,Mag_magneto_left_ankle_mean,Mag_acc_right_lower_arm_mean,Mag_gyro_right_lower_arm_mean,Mag_magneto_right_lower_arm_mean,lead1_mean,lead2_mean,Mag_acc_chest_std,...,lead2_median,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr,label,subject
68583,16.660834,22.739469,1.122863,147.702579,19.58276,1.112524,37.009615,0.168711,0.004188,7.973288,...,-0.209315,3.132874,3.28798,3.321923,3.125445,3.170357,3.321893,3.202713,12,10
68584,6.619939,14.656477,1.11107,156.873259,11.660501,1.1016,43.057189,-0.016323,-0.030558,3.936831,...,-0.221875,3.061586,3.119042,3.32168,3.198856,3.029072,3.321896,3.144585,12,10
68585,2.9873,7.26768,1.083527,84.677518,6.308694,1.099538,57.414379,-0.132706,-0.05191,1.767915,...,-0.041863,3.10052,3.219271,3.32157,3.172892,3.16672,3.321914,3.189782,12,10
68586,10.000344,12.552993,1.072605,61.324592,4.66511,1.111081,39.014349,0.033071,0.084145,10.612423,...,0.094191,2.534356,2.979521,3.321889,3.166133,3.114388,3.321875,2.990911,12,10
68587,19.947034,20.389064,1.078787,53.658541,14.234058,1.11862,29.457163,0.143723,0.147455,8.068956,...,0.092098,3.012803,3.043583,3.169889,3.024088,2.664141,3.169909,3.033559,12,10


In [28]:
train_dataset2.to_csv('Dataset2.csv', index = False)

- 2.3 Window sliding at 10hz where step size = 10

In [1]:
import preprocessing as pre # module for data-preprocessing.
import numpy as np
import pandas as pd

In [2]:
new_data = pd.read_csv('mag_n_lead_dataset.csv')

In [3]:
"""
Classify data with label and store them to list.
"""
group_by_subject = dict()

for i in range(10):
    group_by_label = []
    subject = new_data[new_data['subject'] == i+1]
    for j in range(1,13):        
        group_by_label.append(subject[subject['label'] == j])
    group_by_subject[i] = group_by_label
    
group_by_subject[0][0]

Unnamed: 0,Mag_acc_chest,Mag_acc_left_ankle,Mag_gyro_left_ankle,Mag_magneto_left_ankle,Mag_acc_right_lower_arm,Mag_gyro_right_lower_arm,Mag_magneto_right_lower_arm,lead1,lead2,label,subject
0,9.867577,9.823208,1.045822,1.076806,9.669954,0.997630,0.621168,0.008373,-0.033490,1,1
1,9.804517,9.840192,1.045822,1.392666,9.797777,0.997630,0.830400,-0.025118,-0.025118,1,1
2,9.893964,9.903023,1.025506,1.176963,9.760349,0.997630,0.621168,0.025118,0.016745,1,1
3,9.783027,9.898630,1.025506,1.750130,9.752004,0.997498,0.535752,0.180010,0.129770,1,1
4,9.826135,9.827709,1.025506,1.392666,9.782695,0.997498,0.744476,0.092098,0.046049,1,1
...,...,...,...,...,...,...,...,...,...,...,...
3067,9.918062,9.929257,1.040996,0.603954,9.926187,1.007188,1.098038,-0.669810,-0.330720,1,1
3068,9.728726,9.699472,1.040996,0.992579,9.642964,1.010210,0.842011,0.858190,0.226060,1,1
3069,9.964320,9.842705,1.040565,0.992579,9.646205,1.010210,1.150000,-0.427000,-0.259550,1,1
3070,9.955235,9.841280,1.040565,1.105531,9.771362,1.010210,0.842011,-0.037677,-0.012559,1,1


In [4]:
# Mean operation.
group_by_sub_mean = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mean = []
    for data in subject:
        group_by_label_mean.append(pd.DataFrame(pre.mean_at_10hz(data.iloc[:,:-2],stride = 10)))
    group_by_sub_mean[i] = group_by_label_mean

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 

In [5]:
# Give name to column
columns = [
    'Mag_acc_chest_mean',
    'Mag_acc_left_ankle_mean',
    'Mag_gyro_left_ankle_mean',
    'Mag_magneto_left_ankle_mean',
    'Mag_acc_right_lower_arm_mean',
    'Mag_gyro_right_lower_arm_mean',
    'Mag_magneto_right_lower_arm_mean',
    'lead1_mean',
    'lead2_mean'
]

for subject in group_by_sub_mean.values():
    for data in subject:
        data.columns = columns

In [6]:
# standard deviation operation.
group_by_sub_std = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_std = []
    for data in subject:
        group_by_label_std.append(pd.DataFrame(pre.std_at_10hz(data.iloc[:,:-2],stride = 10)))
    group_by_sub_std[i] = group_by_label_std

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 

In [7]:
columns = [
    'Mag_acc_chest_std',
    'Mag_acc_left_ankle_std',
    'Mag_gyro_left_ankle_std',
    'Mag_magneto_left_ankle_std',
    'Mag_acc_right_lower_arm_std',
    'Mag_gyro_right_lower_arm_std',
    'Mag_magneto_right_lower_arm_std',
    'lead1_std',
    'lead2_std'
]
for sub in group_by_sub_std.values():
    for data in sub:
        data.columns = columns
        
group_by_sub_std[0][0].head()

Unnamed: 0,Mag_acc_chest_std,Mag_acc_left_ankle_std,Mag_gyro_left_ankle_std,Mag_magneto_left_ankle_std,Mag_acc_right_lower_arm_std,Mag_gyro_right_lower_arm_std,Mag_magneto_right_lower_arm_std,lead1_std,lead2_std
0,0.056647,0.051061,0.007542,0.24473,0.037394,6e-05,0.207926,0.322826,0.090431
1,0.09245,0.068822,0.006036,0.124637,0.135921,0.00461,0.539656,0.070678,0.055498
2,0.077628,0.136127,0.008002,0.146992,0.044482,0.004646,0.444433,0.030408,0.071026
3,0.144327,0.074265,0.003518,0.14655,0.079843,0.007242,0.244377,0.02364,0.023905
4,0.117732,0.093106,0.000202,0.159589,0.1198,0.007007,0.448767,0.12457,0.060602


In [8]:
# max operation.
group_by_sub_max = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_max = []
    for data in subject:
        group_by_label_max.append(pd.DataFrame(pre.max_at_10hz(data.iloc[:,:-2],stride = 10)))
    group_by_sub_max[i] = group_by_label_max

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 

In [9]:
columns = [
    'Mag_acc_chest_max',
    'Mag_acc_left_ankle_max',
    'Mag_gyro_left_ankle_max',
    'Mag_magneto_left_ankle_max',
    'Mag_acc_right_lower_arm_max',
    'Mag_gyro_right_lower_arm_max',
    'Mag_magneto_right_lower_arm_max',
    'lead1_max',
    'lead2_max'
]
for sub in group_by_sub_max.values():
    for data in sub:
        data.columns = columns
group_by_sub_max[0][0].head()

Unnamed: 0,Mag_acc_chest_max,Mag_acc_left_ankle_max,Mag_gyro_left_ankle_max,Mag_magneto_left_ankle_max,Mag_acc_right_lower_arm_max,Mag_gyro_right_lower_arm_max,Mag_magneto_right_lower_arm_max,lead1_max,lead2_max
0,9.893964,9.921387,1.045822,1.75013,9.807401,0.99763,1.13409,0.74935,0.12977
1,9.86837,9.940715,1.04174,1.239423,9.989964,1.005359,2.562689,0.037677,0.025118
2,9.963018,10.01562,1.047914,1.392666,9.836822,1.003851,1.925829,0.13815,0.11722
3,10.008154,9.973147,1.041407,1.206517,9.836724,1.009171,1.041829,0.14652,0.12977
4,9.923464,9.956879,1.042695,1.309615,9.939315,1.009171,1.982104,0.23025,0.17164


In [10]:
# min operation.
group_by_sub_min = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_min = []
    for data in subject:
        group_by_label_min.append(pd.DataFrame(pre.min_at_10hz(data.iloc[:,:-2],stride = 10)))
    group_by_sub_min[i] = group_by_label_min

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 

In [11]:
columns = [
    'Mag_acc_chest_min',
    'Mag_acc_left_ankle_min',
    'Mag_gyro_left_ankle_min',
    'Mag_magneto_left_ankle_min',
    'Mag_acc_right_lower_arm_min',
    'Mag_gyro_right_lower_arm_min',
    'Mag_magneto_right_lower_arm_min',
    'lead1_min',
    'lead2_min'
]

for sub in group_by_sub_min.values():
    for data in sub:
        data.columns = columns
group_by_sub_min[0][0].head()

Unnamed: 0,Mag_acc_chest_min,Mag_acc_left_ankle_min,Mag_gyro_left_ankle_min,Mag_magneto_left_ankle_min,Mag_acc_right_lower_arm_min,Mag_gyro_right_lower_arm_min,Mag_magneto_right_lower_arm_min,lead1_min,lead2_min
0,9.709916,9.760252,1.025506,0.973888,9.669954,0.997498,0.35912,-0.66562,-0.21769
1,9.535439,9.715958,1.02781,0.840166,9.538493,0.995082,0.535752,-0.21769,-0.17582
2,9.701768,9.533806,1.028636,0.889191,9.684612,0.993103,0.179565,0.041863,-0.071167
3,9.50825,9.697575,1.032323,0.743476,9.573753,0.989646,0.255289,0.071167,0.058608
4,9.502711,9.637157,1.042254,0.717248,9.591558,0.987904,0.568428,-0.24699,-0.012559


In [12]:
# median operation.
group_by_sub_mid = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_mid = []
    for data in subject:
        group_by_label_mid.append(pd.DataFrame(pre.median_at_10hz(data.iloc[:,:-2],stride = 10)))
    group_by_sub_mid[i] = group_by_label_mid

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
7 col has been finished
size of 7 col : 308
8 col has been finished
size of 8 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 

In [13]:
columns = [
    'Mag_acc_chest_median',
    'Mag_acc_left_ankle_median',
    'Mag_gyro_left_ankle_median',
    'Mag_magneto_left_ankle_median',
    'Mag_acc_right_lower_arm_median',
    'Mag_gyro_right_lower_arm_median',
    'Mag_magneto_right_lower_arm_median',
    'lead1_median',
    'lead2_median'
]

for sub in group_by_sub_mid.values():
    for data in sub:
        data.columns = columns
group_by_sub_mid[9][0].head()

Unnamed: 0,Mag_acc_chest_median,Mag_acc_left_ankle_median,Mag_gyro_left_ankle_median,Mag_magneto_left_ankle_median,Mag_acc_right_lower_arm_median,Mag_gyro_right_lower_arm_median,Mag_magneto_right_lower_arm_median,lead1_median,lead2_median
0,9.76516,9.780499,0.987824,0.798656,9.808951,1.080134,0.867502,-0.012559,-0.002093
1,9.756201,9.818461,0.985165,0.81727,9.766084,1.090528,0.42158,-0.108844,-0.062795
2,9.67026,9.844644,0.982991,0.80766,9.753785,1.086408,0.826639,-0.069074,-0.041863
3,9.779766,9.888188,0.988926,0.772086,9.869566,1.08385,0.860163,-0.10466,-0.075353
4,9.755532,9.765561,0.990979,0.638248,9.841711,1.095068,1.15,-0.12559,-0.098378


In [14]:
# entropy operation.
group_by_sub_etr = dict()

for i,subject in enumerate(group_by_subject.values()):
    group_by_label_etr = []
    for data in subject:
        group_by_label_etr.append(pd.DataFrame(pre.entropy_at_10hz(data.iloc[:,:-4],stride = 10)))
    group_by_sub_etr[i] = group_by_label_etr

0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 1 col : 308
2 col has been finished
size of 2 col : 308
3 col has been finished
size of 3 col : 308
4 col has been finished
size of 4 col : 308
5 col has been finished
size of 5 col : 308
6 col has been finished
size of 6 col : 308
0 col has been finished
size of 0 col : 308
1 col has been finished
size of 

In [15]:
columns = [
    'Mag_acc_chest_etr',
    'Mag_acc_left_ankle_etr',
    'Mag_gyro_left_ankle_etr',
    'Mag_magneto_left_ankle_etr',
    'Mag_acc_right_lower_arm_etr',
    'Mag_gyro_right_lower_arm_etr',
    'Mag_magneto_right_lower_arm_etr',
]

for sub in group_by_sub_etr.values():
    for data in sub:
        data.columns = columns
group_by_sub_etr[0][0]

Unnamed: 0,Mag_acc_chest_etr,Mag_acc_left_ankle_etr,Mag_gyro_left_ankle_etr,Mag_magneto_left_ankle_etr,Mag_acc_right_lower_arm_etr,Mag_gyro_right_lower_arm_etr,Mag_magneto_right_lower_arm_etr
0,3.321904,3.321909,3.321890,3.295995,3.321917,3.321928,3.257426
1,3.321863,3.321893,3.321904,3.311754,3.321790,3.321913,3.219169
2,3.321883,3.321789,3.321885,3.308699,3.321913,3.321913,3.130447
3,3.321771,3.321887,3.321920,3.306166,3.321880,3.321890,3.236069
4,3.321823,3.321863,3.321928,3.300820,3.321819,3.321892,3.211613
...,...,...,...,...,...,...,...
303,3.321756,3.321835,3.321894,3.309040,3.321887,3.321924,3.228935
304,3.321878,3.321889,3.321890,3.316005,3.321853,3.321910,3.194604
305,3.321735,3.321887,3.321886,3.295867,3.321883,3.321884,3.174268
306,3.321817,3.321872,3.321895,3.299453,3.321875,3.321907,3.234457


In [16]:
# Give label to only one DataFrame.
for i, subject in enumerate(group_by_sub_etr.values()):
    for j, data in enumerate(subject):
        data['label'] = j+1
        data['subject'] = i+1

In [17]:
mean_list = []
std_list = []
max_list = []
min_list = []
mid_list = []
etr_list = []

for sub in group_by_sub_mean.values():
    for data in sub:
        mean_list.append(data)

for sub in group_by_sub_std.values():
    for data in sub:
        std_list.append(data)
    
for sub in group_by_sub_max.values():
    for data in sub:
        max_list.append(data)
    
for sub in group_by_sub_min.values():
    for data in sub:
        min_list.append(data)
    
for sub in group_by_sub_mid.values():
    for data in sub:
        mid_list.append(data)
    
for sub in group_by_sub_etr.values():
    for data in sub:
        etr_list.append(data)

In [18]:
mean_df = pd.concat(mean_list)
std_df = pd.concat(std_list)
max_df = pd.concat(max_list)
min_df = pd.concat(min_list)
mid_df = pd.concat(mid_list)
etr_df = pd.concat(etr_list)

In [19]:
pre.reindex(mean_df)
pre.reindex(std_df)
pre.reindex(max_df)
pre.reindex(min_df)
pre.reindex(mid_df)
pre.reindex(etr_df)

In [20]:
_list = [mean_df,
         std_df,
         max_df,
         min_df,
         mid_df,
         etr_df
        ]
train_dataset3 = pd.concat(_list, axis = 1)

In [21]:
train_dataset3.isnull().values.any()

False

In [23]:
train_dataset3.to_csv('Dataset3.csv', index = False)