In [1]:
# imports
from scipy.io import loadmat
import numpy as np
import pandas as pd
from tsfresh import extract_features
from tsfresh.feature_extraction import MinimalFCParameters, EfficientFCParameters

In [2]:
# load data
matfile_baseline = loadmat('data/raw/Ach-AT/00_0315_ach-at_0.mat')
matfile_ach = loadmat('data/raw/Ach-AT/00_0315_ach-at_1.mat')
matfile_at = loadmat('data/raw/Ach-AT/00_0315_ach-at_2.mat')

In [3]:
def make_windows(matfile, window_size = 6000):
    # retrieve the relevant data from the dictionaries
    df = pd.DataFrame(matfile['filt_data'].copy())
    df = df.T
    time = pd.DataFrame(matfile['filt_t'].copy())
    time = time.T

    # set up the time column
    time.columns = ['t']
    time['t'] = time['t'] - 60000 # make time start at 0
    time['window_id'] = time['t'] / window_size # n second windows
    time['window_id'] = time['window_id'].astype(int)

    # join the electrode signals with time
    df = pd.concat([time, df], axis=1)

    return df

array([[1200000.99998847, 1200001.99998847, 1200002.99998847, ...,
        1379997.99998421, 1379998.99998421, 1379999.99998421]])

In [26]:
df_base = make_windows(matfile_baseline)
df_ach = make_windows(matfile_ach)
df_at = make_windows(matfile_at)

Unnamed: 0,t,window_id,0,1,2,3,4,5,6,7,...,51,52,53,54,55,56,57,58,59,y
0,1.000000,0,-135.904409,-126.704873,-129.462254,-121.519825,-124.371209,-102.553959,-426.883940,-103.540012,...,-117.265941,-140.741374,-136.679302,-182.152656,-145.942890,-145.357315,-137.695972,-143.410471,-141.207962,0
1,2.000000,0,-135.970862,-126.796326,-129.559494,-121.627592,-124.485424,-102.687438,-426.496982,-103.706237,...,-117.290073,-140.782349,-136.725187,-181.736284,-145.996543,-145.411239,-137.754658,-143.477315,-141.291289,0
2,3.000000,0,-136.036568,-126.887027,-129.655970,-121.734591,-124.598914,-102.820126,-426.112029,-103.871656,...,-117.313468,-140.822649,-136.770327,-181.322447,-146.049450,-145.464410,-137.812580,-143.543406,-141.373857,0
3,4.000000,0,-136.101527,-126.976977,-129.751683,-121.840824,-124.711680,-102.952022,-425.729129,-104.036270,...,-117.336128,-140.862274,-136.814723,-180.911182,-146.101611,-145.516827,-137.869738,-143.608744,-141.455666,0
4,5.000000,0,-136.165739,-127.066176,-129.846632,-121.946291,-124.823721,-103.083127,-425.348329,-104.200076,...,-117.358052,-140.901224,-136.858376,-180.502526,-146.153025,-145.568492,-137.926133,-143.673330,-141.536716,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179995,179996.000001,29,-31.363922,-18.136017,-24.654990,-15.623569,-21.193228,-9.688060,-29.621728,5.712186,...,-42.411280,-32.380775,-25.994958,-41.577189,-27.187457,-27.255133,-25.560794,-31.596157,-8.448030,0
179996,179997.000001,29,-31.529919,-18.292296,-24.811492,-15.786962,-21.352978,-9.843246,-29.555560,5.557249,...,-42.565288,-32.554504,-26.169610,-41.326883,-27.357504,-27.417656,-25.730796,-31.762113,-8.557240,0
179997,179998.000001,29,-31.695506,-18.448185,-24.967587,-15.949915,-21.512294,-9.998001,-29.488908,5.402713,...,-42.719049,-32.727916,-26.343946,-41.076677,-27.527192,-27.579813,-25.900408,-31.927695,-8.666124,0
179998,179999.000001,29,-31.860677,-18.603680,-25.123270,-16.112426,-21.671170,-10.152321,-29.421777,5.248583,...,-42.872559,-32.901007,-26.517963,-40.826567,-27.696516,-27.741599,-26.069626,-32.092899,-8.774678,0


In [29]:
# train on four electrodes - choose 4,27,34 and 57 (the centre-most ones)
df_base_4 = df_base[['t','window_id',4,27,34,57]]
df_base_4.loc[:,'y'] = 0
df_ach_4 = df_ach[['t','window_id',4,27,34,57]]
df_ach_4.loc[:,'y'] = 1
df_at_4 = df_at[['t','window_id',4,27,34,57]]
df_at_4.loc[:,'y'] = 2
# this could be done more efficiently, build into the function


df_4 = pd.concat([df_base_4, df_ach_4, df_at_4], axis=0, ignore_index=True)
y_4 = df_4[['window_id','y']]
df_4 = df_4.drop(columns = ['y'])
y_4.to_hdf('0_0315_4_electrodes_y.h5', key = 'y', complevel = 9)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [11]:
# extract minimal features and see how long it takes
X_minimal = extract_features(df_4, column_id='window_id', column_sort='t', default_fc_parameters=MinimalFCParameters())

Feature Extraction: 100%|██████████| 10/10 [00:00<00:00, 22.59it/s]


In [17]:
# extract more features
X_eff = extract_features(df_4, column_id='window_id', column_sort='t', default_fc_parameters=EfficientFCParameters())

Feature Extraction: 100%|██████████| 10/10 [11:36<00:00, 69.67s/it]


In [16]:
X_minimal.to_hdf('0_0315_4_electrodes_min.h5', key = 'features', complevel = 9)

In [19]:
X_eff.to_hdf('0_0315_4_electrodes_eff.h5',key = 'features', complevel = 9)