# Feature extraction from python vs matlab
- See matlab and python code below
- See at bottom for both features sets side by side
- Noted ifferences below.

In [None]:
function [fvec, flab] = getFeatures(acc)
% INPUT acc - 3 x n vector: 1-3 are xyz accelerations for n data points

S = acc(1:3,:); %matrix of xyz acceleration data (excludes time data)

fvec = []; %stores features
flab = {}; %stores names of features
axes = {'x','y','z'};

%% Features for Each Axis (Time Domain)
for i=1:size(S,1)

    %Mean 
    fvec = [fvec nanmean(S(i,:))]; flab = [flab; [axes{i} '-mean']];

    %Range of values
    fvec = [fvec range(S(i,:))]; flab = [flab; [axes{i} '-range']];

    %Interquartile Range
    fvec = [fvec iqr(S(i,:))]; flab = [flab; [axes{i} '-IQR']];    

    %Histogram of the z-score values

    zvals = -2:1:2;
    histvec = histc((S(i,:)-nanmean(S(i,:))/nanstd(S(i,:))),zvals);
    histvec = histvec(1:end-1); % removing the last data point which counts how many values match exactly 3. (nonsense)
    fvec = [fvec histvec]; 
    for j=1:length(histvec),
        flab = [flab; [axes{i} sprintf('-hist%d',zvals(j))]];
    end
   
    %Std (2nd moment)
    fvec = [fvec nanstd(S(i,:))];  flab = [flab; [axes{i} '-std']];
    
    %Skewness + Kurtosis (3rd and 4th moments)
    if nanstd(S(i,:)) == 0
        X = S(i,:); N = length(X);
        s = 1/N*sum((X-mean(X)).^3)/( sqrt(1/N*sum((X-mean(X)).^2)) + eps )^3; %skewness
        k = 1/N*sum((X-mean(X)).^4)/( 1/N*sum((X-mean(X)).^2) + eps )^2; %kurtosis
        fvec = [fvec s]; flab = [flab; [axes{i} '-skew']];
        fvec = [fvec k]; flab = [flab; [axes{i} '-kurt']];
    else
        fvec = [fvec skewness(S(i,:))]; flab = [flab; [axes{i} '-skew']];
        fvec = [fvec kurtosis(S(i,:))]; flab = [flab; [axes{i} '-kurt']];
    end
 
    %Mean of differences
    fvec = [fvec nanmean(diff(S(i,:)))]; flab = [flab; [axes{i} '-mean diff']];
  
    %Std of differences (2nd moment)
    fvec = [fvec nanstd(diff(S(i,:)))]; flab = [flab; [axes{i} '-std diff']];
 
    %Skewness + Kurtosis of differences (3rd and 4th moments)
%Did NOT add condition
    if nanstd(diff(S(i,:))) == 0
        X = diff(S(i,:)); N = length(X);
        s = 1/N*sum((X-mean(X)).^3)/( sqrt(1/N*sum((X-mean(X)).^2)) + eps )^3; %skewness
        k = 1/N*sum((X-mean(X)).^4)/( 1/N*sum((X-mean(X)).^2) + eps )^2; %kurtosis 
        fvec = [fvec s]; flab = [flab; [axes{i} '-skew diff']];
        fvec = [fvec k]; flab = [flab; [axes{i} '-kurt diff']];
    else
        fvec = [fvec skewness(diff(S(i,:)))]; flab = [flab; [axes{i} '-skew diff']];
        fvec = [fvec kurtosis(diff(S(i,:)))]; flab = [flab; [axes{i} '-kurt diff']];
    end
end


%% Features Across All Axes (Time Domain)
%Mean of squares
fvec = [fvec nanmean(nanmean(S.^2))];
flab = [flab; 'mean of squares'];

%Normalize values (divided by acc norm)
S2=S./(ones(size(S,1),1)*sqrt(nansum(S.^2)));

%Cross products of normalized values
fvec = [fvec nanmean(S2(1,:).*S2(2,:))]; flab = [flab; 'CrossProd norm xy'];
fvec = [fvec nanmean(S2(1,:).*S2(3,:))]; flab = [flab; 'CrossProd norm xz'];
fvec = [fvec nanmean(S2(2,:).*S2(3,:))]; flab = [flab; 'CrossProd norm yz'];
fvec = [fvec abs(nanmean(S2(1,:).*S2(2,:)))]; flab = [flab; 'abs CrossProd xy']; 
fvec = [fvec abs(nanmean(S2(1,:).*S2(3,:)))]; flab = [flab; 'abs CrossProd xz'];
fvec = [fvec abs(nanmean(S2(2,:).*S2(3,:)))]; flab = [flab; 'abs CrossProd yz'];

%Cross products of raw acceleration values
fvec = [fvec nanmean(S(1,:).*S(2,:))]; flab = [flab; 'CrossProd xy'];
fvec = [fvec nanmean(S(1,:).*S(3,:))]; flab = [flab; 'CrossProd xz'];
fvec = [fvec nanmean(S(2,:).*S(3,:))]; flab = [flab; 'CrossProd yz'];
fvec = [fvec abs(nanmean(S(1,:).*S(2,:)))]; flab = [flab; 'abs CrossProd xy'];
fvec = [fvec abs(nanmean(S(1,:).*S(3,:)))]; flab = [flab; 'abs CrossProd xz'];
fvec = [fvec abs(nanmean(S(2,:).*S(3,:)))]; flab = [flab; 'abs CrossProd yz'];

%Correlation coefficients r(x,y)
fvec = [fvec corr(S(1,:)',S(2,:)')]; flab = [flab; 'corr coeff xy'];
fvec = [fvec corr(S(2,:)',S(3,:)')]; flab = [flab; 'corr coeff yz'];
fvec = [fvec corr(S(1,:)',S(3,:)')]; flab = [flab; 'corr coeff xz'];

%Sum of xyz std
fvec = [fvec sum([std(S(1,:)) std(S(2,:)) std(S(3,:))])]; flab = [flab; 'std_sum'];

% %Linear Fit
% ws = warning('off','all'); %Turn off warning
% p_xy = polyfit(S(1,:),S(2,:),1);
% p_xz = polyfit(S(1,:),S(3,:),1);
% p_yz = polyfit(S(2,:),S(3,:),1);
% fvec = [fvec p_xy(1)]; flab = [flab; 'slope xy'];
% fvec = [fvec p_xz(1)]; flab = [flab; 'slope xz'];
% fvec = [fvec p_yz(1)]; flab = [flab; 'slope yz'];
% warning(ws); %Turn warning back on

%% Frequency Domain Processing (High Pass + Power Spectra)
filtered = cell(3,1);
PSD_welch = cell(3,1);
f_welch = cell(3,1);
fc = 0.2; %cutoff frequency (Hz)
fs = 30;
f_nyq = fs/2;

%High Pass Filter
for ii = 1:3
    [b, a] = butter(2,(fc*pi)/f_nyq,'high');
    filtered{ii} = filter(b,a,S(ii,:)); 
end

%Power Spectra
for ii = 1:3
    win_size = ceil(length(filtered{ii})/2);
    [PSD_welch{ii}, f_welch{ii}] = pwelch(filtered{ii}, win_size, [], [], fs);
end

%% Features for Each Axis (Frequency Domain)
for ii = 1:3

    %Mean 
    fvec = [fvec nanmean(PSD_welch{ii})]; flab = [flab; [axes{ii} '-mean (PSD)']];
  
    %Std (2nd moment)
    fvec = [fvec nanstd(PSD_welch{ii})];  flab = [flab; [axes{ii} '-std (PSD)']];
  
    %Skewness + Kurtosis (3rd and 4th moments)
    if nanstd(PSD_welch{ii}) == 0
        X = PSD_welch{ii}; N = length(X);
        s = 1/N*sum((X-mean(X)).^3)/( sqrt(1/N*sum((X-mean(X)).^2)) + eps )^3; %skewness
        k = 1/N*sum((X-mean(X)).^4)/( 1/N*sum((X-mean(X)).^2) + eps )^2; %kurtosis
        fvec = [fvec s]; flab = [flab; [axes{ii} '-skew (PSD)']];
        fvec = [fvec k]; flab = [flab; [axes{ii} '-kurt (PSD)']];
    else
        fvec = [fvec skewness(PSD_welch{ii})]; flab = [flab; [axes{ii} '-skew (PSD)']];
        fvec = [fvec kurtosis(PSD_welch{ii})]; flab = [flab; [axes{ii} '-kurt (PSD)']];
    end
   
    %Mean Power for 0.5 Hz Intervals
    bins = [0:0.5:10]; %0-10 Hz with bins for every 0.5 Hz
    N = length(bins)-1;
    bin_ind = [1; zeros(N,1)];
    %PSD_welch_norm = PSD_welch{ii}/max(PSD_welch{ii});
    PSD_welch_norm = PSD_welch{ii};
    bin_val = zeros(N,1);
    for jj = 2:length(bins) %ignore 0 Hz in bins vector
        freq_bin = bins(jj);
        for zz = 2:length(f_welch{ii})
           if ((f_welch{ii}(zz-1) < freq_bin) && (f_welch{ii}(zz) > freq_bin)) || f_welch{ii}(zz) == freq_bin
               bin_ind(jj) = zz;
           end
        end
    end

    for kk = 2:length(bins)
        bin_val(kk) = mean(PSD_welch_norm(bin_ind(kk-1):bin_ind(kk)));
    end
    fvec = [fvec bin_val(2:end)'];
    for jj = 2:length(bin_val)
        flab = [flab; [axes{ii} sprintf('_bin_%d',bins(jj))]];
    end
end
return

%% Features Across All Axes (Frequency Domain)
%Sum of std
fvec = [fvec sum([std(PSD_welch{1}) std(PSD_welch{2}) std(PSD_welch{3})])]; flab = [flab; 'std_sum'];


In [1]:
def featuretest(clip_data):
    """
    Extract features from a simple clip
    Input: simple clip without trial or sensor dict keys
    Output: feature matrix from all clips from given subject and scores for each clip
    Column names separate for acc and gyro data.
    """
    
    features_list = ['meanX','meanY','meanZ','rangeX','rangeY','rangeZ','iqrX','iqrY','iqrZ',
                     'stddev_X','stddev_Y','stddev_Z','skewX','skewY','skewZ','kurtX','kurtY','kurtZ',
                     'hist1_X','hist2_X','hist3_X','hist4_X',
                     'hist1_Y','hist2_Y','hist3_Y','hist4_Y',
                     'hist1_Z','hist2_Z','hist3_Z','hist4_Z',
                     #Moments of derivative: mean, SD, skew, kurtosis
                     'mean_derivative_x','mean_derivative_y','mean_derivative_z',
                     'std_derivative_x','std_derivative_y','std_derivative_z',
                     'skew_derivative_x','skew_derivative_y','skew_derivative_z',
                     'kurt_derivative_x','kurt_derivative_y','kurt_derivative_z',
                     'mean_squared_norm','sum_stddev',
                     'xcorr_XY','xcorr_XZ','xcorr_YZ',
                     'crossprod_raw_xy','crossprod_raw_xz','crossprod_raw_yz',
                     'crossprod_norm_xy','crossprod_norm_xz','crossprod_norm_yz',
                     'abs_crossprod_raw_xy','abs_crossprod_raw_xz','abs_crossprod_raw_yz',
                     'abs_crossprod_norm_xy','abs_crossprod_norm_xz','abs_crossprod_norm_yz',
                     'PSD_mean_X','PSD_mean_Y','PSD_mean_Z',
                     'PSD_std_X','PSD_std_Y','PSD_std_Z',
                     'PSD_skew_X','PSD_skew_Y','PSD_skew_Z',
                     'PSD_kur_X','PSD_kur_Y','PSD_kur_Z',
                     # mean power 20 bins
                     # x axis
                     'meanpower_bin1_x','meanpower_bin2_x','meanpower_bin3_x','meanpower_bin4_x',
                     'meanpower_bin5_x','meanpower_bin6_x','meanpower_bin7_x','meanpower_bin8_x',
                     'meanpower_bin9_x','meanpower_bin10_x','meanpower_bin11_x','meanpower_bin12_x',
                     'meanpower_bin13_x','meanpower_bin14_x','meanpower_bin15_x','meanpower_bin16_x',
                     'meanpower_bin17_x','meanpower_bin18_x','meanpower_bin19_x','meanpower_bin20_x',
                     # y axis
                     'meanpower_bin1_y','meanpower_bin2_y','meanpower_bin3_y','meanpower_bin4_y',
                     'meanpower_bin5_y','meanpower_bin6_y','meanpower_bin7_y','meanpower_bin8_y',
                     'meanpower_bin9_y','meanpower_bin10_y','meanpower_bin11_y','meanpower_bin12_y',
                     'meanpower_bin13_y','meanpower_bin14_y','meanpower_bin15_y','meanpower_bin16_y',
                     'meanpower_bin17_y','meanpower_bin18_y','meanpower_bin19_y','meanpower_bin20_y',
                     # z axis
                     'meanpower_bin1_z','meanpower_bin2_z','meanpower_bin3_z','meanpower_bin4_z',
                     'meanpower_bin5_z','meanpower_bin6_z','meanpower_bin7_z','meanpower_bin8_z',
                     'meanpower_bin9_z','meanpower_bin10_z','meanpower_bin11_z','meanpower_bin12_z',
                     'meanpower_bin13_z','meanpower_bin14_z','meanpower_bin15_z','meanpower_bin16_z',
                     'meanpower_bin17_z','meanpower_bin18_z','meanpower_bin19_z','meanpower_bin20_z',]


    #cycle through all clips for current trial and save dataframe of features for current trial and sensor
    features = []

    rawdata = clip_data

    #range on each axis
    min_xyz = np.min(rawdata,axis=0)
    max_xyz = np.max(rawdata,axis=0)
    r = np.asarray(max_xyz-min_xyz)

    #Moments on each axis - mean, std dev, skew, kurtosis
    mean = np.asarray(np.mean(rawdata,axis=0))
    std = np.asarray(np.std(rawdata,axis=0))
    sk = skew(rawdata)
    kurt = kurtosis(rawdata)

    #Cross-correlation between axes pairs
# Which mode?
#                 # Andrew's code
#                 xcorr_xy = np.correlate(x,y,mode='same')
#                 xcorr_xy = xcorr_xy/np.sum((xcorr_xy))
    # Using mode='same'
#                 xcorr_xy = np.correlate(rawdata.iloc[:,0],rawdata.iloc[:,1],mode='same')
#                 xcorr_xz = np.correlate(rawdata.iloc[:,0],rawdata.iloc[:,2],mode='same')
#                 xcorr_yz = np.correlate(rawdata.iloc[:,1],rawdata.iloc[:,2],mode='same')
    # Using default, mode='full'
    xcorr_xy = np.correlate(rawdata.iloc[:,0],rawdata.iloc[:,1])
    xcorr_xz = np.correlate(rawdata.iloc[:,0],rawdata.iloc[:,2])
    xcorr_yz = np.correlate(rawdata.iloc[:,1],rawdata.iloc[:,2])
#                 xcorr = np.array([xcorr_xy, xcorr_xz, xcorr_yz])
    xcorr = np.array([xcorr_xy[0], xcorr_xz[0], xcorr_yz[0]])

    # interquartile range
    iqrange = iqr(rawdata,axis=0)
    # histogram of z-score values
    hist_z_scores_x = np.histogram(zscore(rawdata.iloc[:,0],axis=0), bins=4, range=(-2,2))
    hist_z_scores_y = np.histogram(zscore(rawdata.iloc[:,1],axis=0), bins=4, range=(-2,2))
    hist_z_scores_z = np.histogram(zscore(rawdata.iloc[:,2],axis=0), bins=4, range=(-2,2))
#                 hist_z_scores = np.array([hist_z_scores_x, hist_z_scores_y, hist_z_scores_z])
    hist_z_scores = np.concatenate((hist_z_scores_x[0], hist_z_scores_y[0], hist_z_scores_z[0]), axis=None)

# Derivative vs differences (matlab code uses differences instead of derivative)
#                 differences = np.diff(rawdata,axis=0)
#                 # mean of differences
#                 mean_diff = np.asarray(np.mean(differences,axis=0))
#                 # std dev of differences
#                 std_diff = np.asarray(np.std(differences,axis=0))
#                 # skewness of differences
#                 skew_diff = skew(differences)
#                 # kurtosis of differences
#                 kurt_diff = kurtosis(differences)
    ##############################
    # derivative - for 3 axis
#                 derivative = difference/32
    derivativex = np.gradient(rawdata.iloc[:,0],32)
    derivativey = np.gradient(rawdata.iloc[:,1],32)
    derivativez = np.gradient(rawdata.iloc[:,2],32)
    derivative = np.array([derivativex,derivativey,derivativez])
    # derivative = np.gradient(rawdata, 32)
    # mean of derivative
    mean_derivative = np.mean(derivative,axis=1)
    # std dev of derivative
    std_derivative = np.std(derivative,axis=1)
    # skewness of derivative
    skew_derivative = skew(derivative,axis=1)
    # kurtosis of derivative
    kurt_derivative = kurtosis(derivative,axis=1)
#                 moments_of_derivative = np.array([mean_derivative,std_derivative,skew_derivative,kurt_derivative])
    moments_of_derivative = np.concatenate((mean_derivative,std_derivative,skew_derivative,kurt_derivative), axis=None)

    # sum of xyz std dev
#                 sum_stddev = np.std(rawdata.iloc[:,0] + np.std(rawdata.iloc[:,1]) + np.std(rawdata.iloc[:,2]))
    sum_stddev = np.array([np.std(rawdata.iloc[:,0] + np.std(rawdata.iloc[:,1]) + np.std(rawdata.iloc[:,2]))])

# How should I get the Euclidiean norm?
#                 mean_of_squares = np.mean(np.mean(rawdata**2,axis=0)) # equation from matlab code - looks weird
# Is degree of freedom n (for mean) or (n-1)?
# Should be 1 feature
#                 norm = np.sqrt(np.square(rawdata).sum(axis=0))
#                 mean_squared_norm = np.mean(np.square(norm),axis=0)

    # Default axis=1, independently normalize each sample, otherwise (if 0) normalize each feature.
    norm = preprocessing.normalize(rawdata)
    # norm = rawdata / np.linalg.norm(rawdata) # rawdata / np.linalg.norm(rawdata)
    # norm = rawdata/np.abs(np.sum(rawdata))
    
    # mean of the squared norm
    mean_squared_norm = np.array([np.mean(np.square(norm))])

    # normalize values (divided by acc norm) to get cross products
    crossprod_norm_xy = np.nanmean(norm[0]*norm[1])
    crossprod_norm_xz = np.nanmean(norm[0]*norm[2])
    crossprod_norm_yz = np.nanmean(norm[1]*norm[2])
    abs_crossprod_norm_xy = np.abs(crossprod_norm_xy)
    abs_crossprod_norm_xz = np.abs(crossprod_norm_xz)
    abs_crossprod_norm_yz = np.abs(crossprod_norm_yz)
    crossprod_raw_xy = np.nanmean(rawdata.iloc[:,0]*rawdata.iloc[:,1])
    crossprod_raw_xz = np.nanmean(rawdata.iloc[:,0]*rawdata.iloc[:,2])
    crossprod_raw_yz = np.nanmean(rawdata.iloc[:,1]*rawdata.iloc[:,2])
    abs_crossprod_raw_xy = np.abs(crossprod_raw_xy)
    abs_crossprod_raw_xz = np.abs(crossprod_raw_xz)
    abs_crossprod_raw_yz = np.abs(crossprod_raw_yz)
    crossprod = np.array([crossprod_raw_xy, crossprod_raw_xz, crossprod_raw_yz,
                          crossprod_norm_xy, crossprod_norm_xz, crossprod_norm_yz,
                          abs_crossprod_raw_xy, abs_crossprod_raw_xz, abs_crossprod_raw_yz,
                          abs_crossprod_norm_xy, abs_crossprod_norm_xz, abs_crossprod_norm_yz])

    # sum of xyz std dev
    sum_stddev = np.array([np.std(rawdata.iloc[:,0] + np.std(rawdata.iloc[:,1]) + np.std(rawdata.iloc[:,2]))])

    # power spectral density (PSD)
    Pxx = power_spectra_welch_axis(rawdata,fm=0,fM=10)
    #moments of PSD
    Pxx_moments = np.array([np.nanmean(Pxx.iloc[:,0].values),np.nanmean(Pxx.iloc[:,1].values),np.nanmean(Pxx.iloc[:,2].values),
            np.nanstd(Pxx.iloc[:,0].values),np.nanstd(Pxx.iloc[:,1].values),np.nanstd(Pxx.iloc[:,2].values),
            skew(Pxx.iloc[:,0].values),skew(Pxx.iloc[:,1].values),skew(Pxx.iloc[:,2].values),
            kurtosis(Pxx.iloc[:,0].values),kurtosis(Pxx.iloc[:,1].values),kurtosis(Pxx.iloc[:,2].values)])
    # Mean power in 0.5 Hz bins between 0 and 10 Hz (x, y, z)
    binedges = np.arange(0,10.5,0.5)
    powerbin_df = Pxx.groupby(pd.cut(Pxx.index, bins=binedges)).mean().fillna(0)
    powerbinarray = np.concatenate((powerbin_df.iloc[:,0],powerbin_df.iloc[:,1],powerbin_df.iloc[:,2]), axis=None)

#                 # Andrew's mean PSD binning code
#                 #power spectra averaged within bins
#                 fm = 1; fM = 10; nbins = 10 #frequency bins
#                 bin1 = int(timestep*n*fm)
#                 bin2 = int(timestep*n*fM)
#                 bins = np.linspace(bin1,bin2,nbins,dtype=int) #sample indices
#                 deltab = int(0.5*np.diff(bins)[0]) #half the size of bin (in samples)
#                 Pxxm = []
#                 for i in bins:
#                     start = int(max(i-deltab,bins[0]))
#                     end = int(min(i+deltab,bins[-1]))
#                 #     print(start,end)
#                 #     print(np.mean(Pxx[start:end]))
#                     Pxxm.append(np.mean(Pxx[start:end]))
#                 Pxxm = np.asarray(Pxxm)
#                 plt.plot(bins/(timestep*n),Pxxm)

    #Assemble features in array
    X = np.concatenate((mean,r,iqrange,std,sk,kurt,hist_z_scores,moments_of_derivative,mean_squared_norm,sum_stddev,
        xcorr,crossprod,Pxx_moments,powerbinarray))
    features.append(X)

    F = np.asarray(features) #feature matrix for all clips from current trial
    print(F.shape)
    features = pd.DataFrame(data=F,columns=features_list,dtype='float32')

    return features

In [7]:
def power_spectra_welch_axis(rawdata,fm,fM):
    """Compute PSD on each axis then combine into a dataframe"""
    x = rawdata.iloc[:,0]
    y = rawdata.iloc[:,1]
    z = rawdata.iloc[:,2]
    n = len(x) #number of samples in clip
    Fs = np.mean(1/(np.diff(x.index)/1000)) #sampling rate in clip
    fx,Pxx_denX = welch(x,Fs,nperseg=min(256,n))
    fy,Pxx_denY = welch(y,Fs,nperseg=min(256,n))
    fz,Pxx_denZ = welch(z,Fs,nperseg=min(256,n))
    #return PSD in desired interval of freq
    inds = (fx<=fM)&(fx>=fm)
    f=fx[inds]
    Pxx_denX=Pxx_denX[inds]
    Pxx_denY=Pxx_denY[inds]
    Pxx_denZ=Pxx_denZ[inds]
    Pxx_den = {'PSD_X':Pxx_denX,'PSD_Y':Pxx_denY,'PSD_Z':Pxx_denY}
    Pxxdf = pd.DataFrame(data=Pxx_den,index=f)

    return Pxxdf

In [3]:
# Importing the Libraries
import os
import platform
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import pathlib
import pickle
from itertools import product
from scipy.stats import skew, kurtosis, pearsonr, iqr, zscore
from scipy.signal import butter, welch, filtfilt, resample
import time
import re
import copy
# import nolds
from sklearn import preprocessing
import scipy.io # test matlab code

%matplotlib inline

In [3]:
if platform.system() == 'Windows':
    if platform.release() == '10':
        path = r'//FS2.smpp.local\RTO\Inpatient Sensors -Stroke\Data\biostamp_data\controls'
        folder_path = r'//FS2.smpp.local\RTO\Inpatient Sensors -Stroke\Data\biostamp_data'
        dict_path = r'//FS2.smpp.local\RTO\Inpatient Sensors -Stroke\Data\biostamp_data\Data_dict'
        features_path = r'//FS2.smpp.local\RTO\Inpatient Sensors -Stroke\Data\biostamp_data\FeatureMatrix'

In [4]:
# load dataset
dict_path2 = r'//FS2.smpp.local\RTO\Inpatient Sensors -Stroke\Data\biostamp_data\Data_dict'
#load Pickle file dict
subj = 'HC02'
f = open(os.path.join(dict_path2, subj + 'dict.pkl'), 'rb')
act_dict = pickle.load(f)
f.close()

In [5]:
# load csv file to check Matlab features
mfeatfile = r'C:\Users\lu-RT&O\Documents\GitHub/matlabfeatures.csv'
matlab = pd.read_csv(mfeatfile)

In [8]:
# extract features using python script
testclip = act_dict['WALKING'][0]['sacrum']['accel']
pythonfea = featuretest(testclip)

(1, 131)


In [9]:
# rearrange columns for matlab features
colsm = ['x-mean', 'y-mean', 'z-mean', 
         'x-range', 'y-range', 'z-range', 
         'x-IQR', 'y-IQR', 'z-IQR', 
         'x-std', 'y-std', 'z-std', 
         'x-skew', 'y-skew', 'z-skew', 
         'x-kurt', 'y-kurt', 'z-kurt', 
         'x-hist-2', 'x-hist-1', 'x-hist0', 'x-hist1',   
         'y-hist-2', 'y-hist-1', 'y-hist0', 'y-hist1', 
         'z-hist-2', 'z-hist-1', 'z-hist0', 'z-hist1', 
         'x-mean diff', 'y-mean diff', 'z-mean diff', 
         'x-std diff', 'y-std diff', 'z-std diff', 
         'x-skew diff', 'y-skew diff', 'z-skew diff', 
         'x-kurt diff', 'y-kurt diff', 'z-kurt diff', 
         'mean of squares', 
         'std_sum', 
         'corr coeff xy', 'corr coeff yz', 'corr coeff xz', 
         'CrossProd xy', 'CrossProd xz', 'CrossProd yz', 
         'CrossProd norm xy', 'CrossProd norm xz', 'CrossProd norm yz', 
         'abs CrossProd xy', 'abs CrossProd xz', 'abs CrossProd yz', 
         'abs CrossProd xy.1', 'abs CrossProd xz.1', 'abs CrossProd yz.1', 
         'x-mean (PSD)', 'y-mean (PSD)', 'z-mean (PSD)', 
         'x-std (PSD)', 'y-std (PSD)', 'z-std (PSD)', 
         'x-skew (PSD)', 'y-skew (PSD)', 'z-skew (PSD)', 
         'x-kurt (PSD)', 'y-kurt (PSD)', 'z-kurt (PSD)', 
         'x_bin_5.000000e-01', 'x_bin_1', 'x_bin_1.500000e+00', 'x_bin_2', 
         'x_bin_2.500000e+00', 'x_bin_3', 'x_bin_3.500000e+00', 'x_bin_4', 
         'x_bin_4.500000e+00', 'x_bin_5', 'x_bin_5.500000e+00', 'x_bin_6', 
         'x_bin_6.500000e+00', 'x_bin_7', 'x_bin_7.500000e+00', 'x_bin_8', 
         'x_bin_8.500000e+00', 'x_bin_9', 'x_bin_9.500000e+00', 'x_bin_10', 
         'y_bin_5.000000e-01', 'y_bin_1', 'y_bin_1.500000e+00', 'y_bin_2', 
         'y_bin_2.500000e+00', 'y_bin_3', 'y_bin_3.500000e+00', 'y_bin_4', 
         'y_bin_4.500000e+00', 'y_bin_5', 'y_bin_5.500000e+00', 'y_bin_6', 
         'y_bin_6.500000e+00', 'y_bin_7', 'y_bin_7.500000e+00', 'y_bin_8', 
         'y_bin_8.500000e+00', 'y_bin_9', 'y_bin_9.500000e+00', 'y_bin_10', 
         'z_bin_5.000000e-01', 'z_bin_1', 'z_bin_1.500000e+00', 'z_bin_2', 
         'z_bin_2.500000e+00', 'z_bin_3', 'z_bin_3.500000e+00', 'z_bin_4', 
         'z_bin_4.500000e+00', 'z_bin_5', 'z_bin_5.500000e+00', 'z_bin_6', 
         'z_bin_6.500000e+00', 'z_bin_7', 'z_bin_7.500000e+00', 'z_bin_8', 
         'z_bin_8.500000e+00', 'z_bin_9', 'z_bin_9.500000e+00', 'z_bin_10']
matlab = matlab[colsm]

In [10]:
# arrange python features to match matlab format
pythonfea2 = pythonfea.astype('float64')
pythonfea2 = pythonfea2.apply(np.format_float_positional, precision=6)
pythonfea2 = pythonfea2.to_frame()

In [11]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)
display_side_by_side(matlab.T,pythonfea2)

Unnamed: 0,0
x-mean,-0.028107
y-mean,0.98163
z-mean,0.28277
x-range,0.71717
y-range,0.94179
z-range,0.74696
x-IQR,0.15531
y-IQR,0.27298
z-IQR,0.21491
x-std,0.11811

Unnamed: 0,0
meanX,-0.028107
meanY,0.981632
meanZ,0.282775
rangeX,0.717174
rangeY,0.941787
rangeZ,0.74696
iqrX,0.155093
iqrY,0.272526
iqrZ,0.214237
stddev_X,0.117967


Notes on differences from matlab
- iqr - minor diff
- std - minor diff
- kurtosis
- bins
- mean/std/skew/kurt diff vs derivative
- mean of squares vs mean squared norm
- std sum vs sum of stddev
- corr
- crossprod norm
- abs crossprod
- abs crossprod norm
- PSD mean/std/skew/kurt
- mean power bins