In [231]:
# Packages
import numpy as np
import pandas as pd
from scipy.stats import median_abs_deviation

In [196]:
# Load data
data_sara_vacuum_raw = pd.read_csv('data/sara_vacuum.csv')[50:-50]
data_sara_picking_raw = pd.read_csv('data/sara_picking.csv')[50:-50]
data_sara_walking_raw = pd.read_csv('data/sara_walking.csv')[50:-50]

# Columns to pick
colnames_original = ["ACCELEROMETER X (m/s²)",
            "ACCELEROMETER Y (m/s²)",
            "ACCELEROMETER Z (m/s²)",
            "GYROSCOPE X (rad/s)",
            "GYROSCOPE Y (rad/s)",
            "GYROSCOPE Z (rad/s)",
            "GRAVITY X (m/s²)",
            "GRAVITY Y (m/s²)",
            "GRAVITY Z (m/s²)"]

# Pick relevant variables
data_sara_vacuum = data_sara_vacuum_raw[colnames_original]
data_sara_picking = data_sara_picking_raw[colnames_original]
data_sara_walking = data_sara_walking_raw[colnames_original]

# New column names
colnames = ['accelerometer_x',
            'accelerometer_y',
            'accelerometer_z',
            'gyroscope_x',
            'gyroscope_y',
            'gyroscope_z',
            'gravity_x',
            'gravity_y',
            'gravity_z']

colnames_dict = dict(zip(colnames_original, colnames))

# Changing column names
data_sara_vacuum.rename(columns = colnames_dict, inplace = True)
data_sara_picking.rename(columns = colnames_dict, inplace = True)
data_sara_walking.rename(columns = colnames_dict, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [197]:
data_sara_vacuum

Unnamed: 0,accelerometer_x,accelerometer_y,accelerometer_z,gyroscope_x,gyroscope_y,gyroscope_z,gravity_x,gravity_y,gravity_z
50,2.8109,-9.0719,-3.0000,-0.0845,-0.7284,0.1152,-1.9680,-9.5650,-0.8990
51,-2.2841,-8.5811,-2.2961,0.1452,-0.5470,0.0224,-1.5553,-9.5990,-1.2693
52,-0.8452,-9.8333,-2.8803,0.3346,-0.3582,-0.0180,-1.6262,-9.5926,-1.2279
53,-0.6848,-8.7032,-2.1764,0.1232,-0.4566,-0.0570,-1.5755,-9.5933,-1.2867
54,-1.6592,-9.2994,-2.6409,-0.0612,-0.3839,0.0016,-1.5056,-9.6030,-1.2984
...,...,...,...,...,...,...,...,...,...
1092,4.9969,-3.3807,0.2873,-0.4632,5.4713,-5.4693,8.0430,5.5953,-0.4168
1093,-1.4509,4.3935,3.8189,-0.0258,6.5757,-3.2488,4.3989,8.0867,3.3801
1094,1.3456,7.3768,4.1062,-1.9897,2.5776,0.6351,2.0549,7.7889,5.5929
1095,3.6752,5.5547,8.5883,-1.0875,1.8214,1.9814,1.6781,6.5008,7.1480


In [239]:
# Function for merging observations

def merge_observations(data, seconds = 10):
    
    # Check if we can divide data evenly, otherwise cut data
    if (len(data) % seconds != 0):
        rows_to_remove = len(data) % seconds
        data = np.array(data[:-rows_to_remove])
        
    # Reshape into batches of observations to be merged
    data_reshaped = data.reshape(np.int(len(data)/seconds), seconds, data.shape[1])
    
    # Calculate agregated measures
    means = []
    stdevs = []
    mins = []
    maxs = []
    mads = []

    for i in range(len(test_reshaped)):
        means.append(test_reshaped[i].mean(axis = 0))
        stdevs.append(test_reshaped[i].std(axis = 0))
        mins.append(test_reshaped[i].min(axis = 0))
        maxs.append(test_reshaped[i].max(axis = 0))
        mads.append(median_abs_deviation(test_reshaped[0]))
        
    # Define columns names
    colnames_means = []
    colnames_stdevs = []
    colnames_mins = []
    colnames_maxs = []
    colnames_mads = []
    for i in range(len(colnames)):
        colnames_means.append(colnames[i] + '_mean')
        colnames_stdevs.append(colnames[i] + '_std')
        colnames_mins.append(colnames[i] + '_min')
        colnames_maxs.append(colnames[i] + '_max')
        colnames_mads.append(colnames[i] + '_mad')
    
    # Create dataframes
    df_mean = pd.DataFrame(stdevs, columns = colnames_means)
    df_std = pd.DataFrame(stdevs, columns = colnames_stdevs)
    df_min = pd.DataFrame(stdevs, columns = colnames_mins)
    df_max = pd.DataFrame(stdevs, columns = colnames_maxs)
    df_mad = pd.DataFrame(stdevs, columns = colnames_mads)
    
    # Merge dataframes
    dataframe_final = pd.concat([df_mean, df_std, df_min, df_max, df_mad], axis = 1)
    
    return dataframe_final

In [240]:
df_test = merge_observations(data_sara_vacuum, 10)

In [241]:
df_test.head()

Unnamed: 0,accelerometer_x_mean,accelerometer_y_mean,accelerometer_z_mean,gyroscope_x_mean,gyroscope_y_mean,gyroscope_z_mean,gravity_x_mean,gravity_y_mean,gravity_z_mean,accelerometer_x_std,...,gravity_z_max,accelerometer_x_mad,accelerometer_y_mad,accelerometer_z_mad,gyroscope_x_mad,gyroscope_y_mad,gyroscope_z_mad,gravity_x_mad,gravity_y_mad,gravity_z_mad
0,1.582763,0.627404,1.7973,0.292816,0.338077,0.067357,0.248138,0.064679,0.28999,1.582763,...,0.28999,1.582763,0.627404,1.7973,0.292816,0.338077,0.067357,0.248138,0.064679,0.28999
1,0.793399,0.637529,1.903425,0.210769,0.28431,0.079274,0.137586,0.084812,0.497567,0.793399,...,0.497567,0.793399,0.637529,1.903425,0.210769,0.28431,0.079274,0.137586,0.084812,0.497567
2,1.133684,1.443153,2.045197,0.268541,0.25789,0.235696,0.307955,0.050667,0.52272,1.133684,...,0.52272,1.133684,1.443153,2.045197,0.268541,0.25789,0.235696,0.307955,0.050667,0.52272
3,0.787834,1.13158,1.733461,0.168884,0.353513,0.183788,0.269778,0.125707,0.483171,0.787834,...,0.483171,0.787834,1.13158,1.733461,0.168884,0.353513,0.183788,0.269778,0.125707,0.483171
4,1.341834,0.839233,1.23848,0.275107,0.373247,0.100411,0.154935,0.07075,0.507371,1.341834,...,0.507371,1.341834,0.839233,1.23848,0.275107,0.373247,0.100411,0.154935,0.07075,0.507371


In [198]:
test_data = data_sara_vacuum.copy()

In [199]:
seconds = 10

In [201]:
if (len(test_data) % seconds != 0):
    rows_to_remove = len(test_data) % seconds
    test_data = np.array(test_data[:-rows_to_remove])

In [235]:
test_data.shape[1]

9

In [236]:
test_reshaped = test_data.reshape(np.int(len(test_data)/seconds), seconds, 9)

In [204]:
test_reshaped[0]

array([[ 2.81090e+00, -9.07190e+00, -3.00000e+00, -8.45000e-02,
        -7.28400e-01,  1.15200e-01, -1.96800e+00, -9.56500e+00,
        -8.99000e-01],
       [-2.28410e+00, -8.58110e+00, -2.29610e+00,  1.45200e-01,
        -5.47000e-01,  2.24000e-02, -1.55530e+00, -9.59900e+00,
        -1.26930e+00],
       [-8.45200e-01, -9.83330e+00, -2.88030e+00,  3.34600e-01,
        -3.58200e-01, -1.80000e-02, -1.62620e+00, -9.59260e+00,
        -1.22790e+00],
       [-6.84800e-01, -8.70320e+00, -2.17640e+00,  1.23200e-01,
        -4.56600e-01, -5.70000e-02, -1.57550e+00, -9.59330e+00,
        -1.28670e+00],
       [-1.65920e+00, -9.29940e+00, -2.64090e+00, -6.12000e-02,
        -3.83900e-01,  1.60000e-03, -1.50560e+00, -9.60300e+00,
        -1.29840e+00],
       [-7.49400e-01, -9.33770e+00, -3.14130e+00, -1.34500e-01,
        -4.41300e-01,  1.99000e-02, -1.56050e+00, -9.57200e+00,
        -1.45340e+00],
       [-3.10780e+00, -1.09107e+01, -3.03590e+00, -6.42200e-01,
        -9.48900e-01, -6.68000

In [206]:
test_df = pd.DataFrame(test_reshaped[0])

In [207]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,2.8109,-9.0719,-3.0,-0.0845,-0.7284,0.1152,-1.968,-9.565,-0.899
1,-2.2841,-8.5811,-2.2961,0.1452,-0.547,0.0224,-1.5553,-9.599,-1.2693
2,-0.8452,-9.8333,-2.8803,0.3346,-0.3582,-0.018,-1.6262,-9.5926,-1.2279
3,-0.6848,-8.7032,-2.1764,0.1232,-0.4566,-0.057,-1.5755,-9.5933,-1.2867
4,-1.6592,-9.2994,-2.6409,-0.0612,-0.3839,0.0016,-1.5056,-9.603,-1.2984
5,-0.7494,-9.3377,-3.1413,-0.1345,-0.4413,0.0199,-1.5605,-9.572,-1.4534
6,-3.1078,-10.9107,-3.0359,-0.6422,-0.9489,-0.0668,-1.5456,-9.5229,-1.7597
7,-1.5395,-9.3018,1.3576,-0.4223,-1.528,-0.0204,-1.7621,-9.4415,-1.9808
8,-2.8037,-9.8093,1.6329,-0.4498,-0.8634,0.1317,-2.0878,-9.4428,-1.626
9,-2.0615,-9.285,0.1796,-0.3557,-0.6502,-0.0754,-2.2332,-9.4522,-1.3562


In [173]:
len(test_reshaped)

104

In [208]:
means = []
stdevs = []
mins = []
maxs = []
mads = []

for i in range(len(test_reshaped)):
    mean = test_reshaped[i].mean(axis = 0)
    std = test_reshaped[i].std(axis = 0)
    mins = test_reshaped[i].min(axis = 0)
    mads = median_abs_deviation(test_reshaped[0])
    
    means.append(mean)
    stdevs.append(std)

In [209]:
colnames_means = []
for i in range(len(colnames)):
    colnames_means.append(colnames[i] + '_mean')

In [210]:
df_means = pd.DataFrame(means, columns = colnames_means)

In [211]:
df_means

Unnamed: 0,accelerometer_x_mean,accelerometer_y_mean,accelerometer_z_mean,gyroscope_x_mean,gyroscope_y_mean,gyroscope_z_mean,gravity_x_mean,gravity_y_mean,gravity_z_mean
0,-1.29243,-9.41334,-1.60008,-0.15472,-0.69059,0.00532,-1.74198,-9.53843,-1.41574
1,-1.97240,-9.15403,-2.24153,-0.11658,-0.55435,-0.16442,-1.85479,-9.51074,-1.41484
2,-2.41392,-10.39690,-0.76711,0.05256,0.42054,0.03641,-1.93113,-9.51084,-1.27040
3,-1.50695,-9.29217,-1.49426,0.15188,0.47591,0.08591,-1.97766,-9.40567,-1.86280
4,-1.45093,-9.57614,-2.19792,0.04072,0.36596,0.10355,-1.73021,-9.55436,-1.26658
...,...,...,...,...,...,...,...,...,...
99,-1.09921,-9.60656,-1.64989,0.03815,0.45375,0.07803,-1.49708,-9.60322,-1.20341
100,-2.01430,-9.85796,0.31078,0.00578,-0.31770,0.02297,-2.02506,-9.56424,-0.42956
101,-1.96021,-9.68651,0.50518,-0.04297,-0.10494,-0.02486,-1.77407,-9.62096,0.51235
102,-1.58333,-9.55412,0.36967,-0.13466,0.26829,0.01753,-1.22011,-9.70991,0.12104


In [218]:
colnames_stdevs = []
for i in range(len(colnames)):
    colnames_stdevs.append(colnames[i] + '_std')

In [219]:
df_std = pd.DataFrame(stdevs, columns = colnames_stdevs)

In [233]:
data = df_means.merge(df_std, df_std, left_index = True, right_index = True)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [221]:
df_std.head()

Unnamed: 0,accelerometer_x_std,accelerometer_y_std,accelerometer_z_std,gyroscope_x_std,gyroscope_y_std,gyroscope_z_std,gravity_x_std,gravity_y_std,gravity_z_std
0,1.582763,0.627404,1.7973,0.292816,0.338077,0.067357,0.248138,0.064679,0.28999
1,0.793399,0.637529,1.903425,0.210769,0.28431,0.079274,0.137586,0.084812,0.497567
2,1.133684,1.443153,2.045197,0.268541,0.25789,0.235696,0.307955,0.050667,0.52272
3,0.787834,1.13158,1.733461,0.168884,0.353513,0.183788,0.269778,0.125707,0.483171
4,1.341834,0.839233,1.23848,0.275107,0.373247,0.100411,0.154935,0.07075,0.507371


In [222]:
data

Unnamed: 0,accelerometer_x_mean,accelerometer_y_mean,accelerometer_z_mean,gyroscope_x_mean,gyroscope_y_mean,gyroscope_z_mean,gravity_x_mean,gravity_y_mean,gravity_z_mean,accelerometer_x_std,accelerometer_y_std,accelerometer_z_std,gyroscope_x_std,gyroscope_y_std,gyroscope_z_std,gravity_x_std,gravity_y_std,gravity_z_std
0,-1.29243,-9.41334,-1.60008,-0.15472,-0.69059,0.00532,-1.74198,-9.53843,-1.41574,1.582763,0.627404,1.797300,0.292816,0.338077,0.067357,0.248138,0.064679,0.289990
1,-1.97240,-9.15403,-2.24153,-0.11658,-0.55435,-0.16442,-1.85479,-9.51074,-1.41484,0.793399,0.637529,1.903425,0.210769,0.284310,0.079274,0.137586,0.084812,0.497567
2,-2.41392,-10.39690,-0.76711,0.05256,0.42054,0.03641,-1.93113,-9.51084,-1.27040,1.133684,1.443153,2.045197,0.268541,0.257890,0.235696,0.307955,0.050667,0.522720
3,-1.50695,-9.29217,-1.49426,0.15188,0.47591,0.08591,-1.97766,-9.40567,-1.86280,0.787834,1.131580,1.733461,0.168884,0.353513,0.183788,0.269778,0.125707,0.483171
4,-1.45093,-9.57614,-2.19792,0.04072,0.36596,0.10355,-1.73021,-9.55436,-1.26658,1.341834,0.839233,1.238480,0.275107,0.373247,0.100411,0.154935,0.070750,0.507371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,-1.09921,-9.60656,-1.64989,0.03815,0.45375,0.07803,-1.49708,-9.60322,-1.20341,1.036248,1.000028,1.209998,0.212747,0.399569,0.141731,0.256915,0.039366,0.437356
100,-2.01430,-9.85796,0.31078,0.00578,-0.31770,0.02297,-2.02506,-9.56424,-0.42956,1.835271,0.619537,0.942417,0.165406,0.420926,0.190851,0.380327,0.054139,0.512104
101,-1.96021,-9.68651,0.50518,-0.04297,-0.10494,-0.02486,-1.77407,-9.62096,0.51235,1.518134,0.629590,0.482369,0.228026,0.289022,0.095548,0.392980,0.074279,0.193002
102,-1.58333,-9.55412,0.36967,-0.13466,0.26829,0.01753,-1.22011,-9.70991,0.12104,1.216192,0.667479,0.526623,0.163527,0.403445,0.144712,0.398055,0.038094,0.473842
