In [1]:
import pandas as pd
import math
import pywt
import scipy.signal

In [2]:
# function for extracting all the features from a given signal
def extract(signal):
    
   min = signal.min()
   max = signal.max()
   peaks = scipy.signal.find_peaks(signal)
   return [
        min,
        max,
        max - min,
        signal.kurtosis(),
        signal.skew(),
        signal.std(),
        signal.mean(),
        # signal.mode(),
        signal.median(),
        (signal**2).sum(),
        signal.quantile(0.25),
        signal.quantile(0.75),
        len(peaks),
        math.sqrt((signal**2).sum()/signal.size)
     ]

In [3]:
#assume it's cleaned
dataset = pd.read_csv ('../data/data_preprocessed.csv',delimiter = ",")
extracted_dataset = []

#constants
OFFSET = 1
OVERLAP = 0.4
WINDOW = 0.9
NUM_ROWS = dataset.shape[0]

start_index = 0
end_index = 1

while end_index < NUM_ROWS:
    
    #find windows in which consecutive rows are recorded less than 1 second apart
    while end_index < NUM_ROWS -1 and pd.Timedelta(pd.Timestamp(dataset.iloc[end_index + 1][0]) - pd.Timestamp(dataset.iloc[end_index][0])).seconds < OFFSET :
        end_index += 1 

    
    macrowindow = dataset.iloc[start_index:end_index]
    
    NUM_ROWS_MACRO = macrowindow.shape[0]
    start_window = 0
    end_window = 1
    
    # find records inside those macrowindows within a window of WINDOW seconds
    while end_window < NUM_ROWS_MACRO:
        
        found = -1;
        while end_window < NUM_ROWS_MACRO -1 and pd.Timedelta(pd.Timestamp(dataset.iloc[end_window + 1][0]) - pd.Timestamp(dataset.iloc[start_window][0])).microseconds < WINDOW * 1000 * 1000 :
            # handling of overlap. Find first record apart from starting record for WINDOW - OVERLAP seconds
            if(found == -1 and pd.Timedelta(pd.Timestamp(dataset.iloc[end_window + 1][0]) - pd.Timestamp(dataset.iloc[start_window][0])).microseconds >= (WINDOW - OVERLAP)*1000 * 1000):
                start_window = end_window + 1 
                found = 0
                
            end_window += 1
            
        window = macrowindow.iloc[start_window:end_window]
        #update indexes for segmentation
        if(found == -1):
            start_window = end_window + 1
            
        end_window += 1

        if window.shape[0] == 0:
            continue

        if pd.Timedelta(pd.Timestamp(window.iloc[-1][0]) - pd.Timestamp(window.iloc[0][0])).microseconds < WINDOW * 1000 * 1000 / 2 :
            continue

        NUM_ROWS_WINDOW = window.shape[0]
        
        #FEATURE EXTRACTION
        
        accelerometer = window.iloc[:,1]
        gyroscope = window.iloc[:,2]
        magnetometer = window.iloc[:,3]
        
        #Wavelet transformations of the singnals coming from the sensors
        coefficients = []

        print('new acc:')
        print(accelerometer)
        
        coeff_acc = pywt.dwt(accelerometer,wavelet = 'db1')
        for c in coeff_acc:
            coefficients.append(pd.Series(c))
            
        coeff_gyr = pywt.dwt(gyroscope,wavelet = 'db1')
        for c in coeff_gyr:
            coefficients.append(pd.Series(c))
            
        coeff_mag = pywt.dwt(magnetometer,wavelet = 'db1')
        for c in coeff_mag:
            coefficients.append(pd.Series(c))

        coefficients.append(accelerometer)
        coefficients.append(gyroscope)
        coefficients.append(magnetometer)
        print(coefficients)
        rows = []
        for coefficient in coefficients:
            #extract features
            rows = rows + extract(coefficient)
  
        # Add label to the row
        rows += [window.iloc[0][-1]]
        # print(rows)

        # add the row to the new dataframe
        extracted_dataset.append(rows)
        
    
             
    start_index = end_index + 1 
    end_index += 1
    
new_dataset = pd.DataFrame(extracted_dataset)
print(new_dataset)
new_dataset.to_csv('data/data_segmented_'+ str(WINDOW) + '_' + str(OVERLAP) + '.csv', index = False)

new acc:
159    10.736062
160    10.804442
161    10.804442
162    10.804442
163    10.548695
         ...    
424    10.219575
425    10.219575
426    10.219575
427    10.123903
428    10.123903
Name: ACC, Length: 270, dtype: float64
new acc:
159    10.736062
160    10.804442
161    10.804442
162    10.804442
163    10.548695
         ...    
425    10.219575
426    10.219575
427    10.123903
428    10.123903
429    10.123903
Name: ACC, Length: 271, dtype: float64
new acc:
580    10.326067
581    10.326067
582    10.326067
583    10.316444
584    10.316444
         ...    
840     8.714400
841     8.935448
842     8.935448
843     8.935448
844     9.067901
Name: ACC, Length: 265, dtype: float64
new acc:
580    10.326067
581    10.326067
582    10.326067
583    10.316444
584    10.316444
         ...    
841     8.935448
842     8.935448
843     8.935448
844     9.067901
845     9.067901
Name: ACC, Length: 266, dtype: float64
new acc:
997     7.893967
998     7.893967
999     7.893967
