# Extract Frequency & Time Series Features

In [1]:
# Load packages
import os
import pandas as pd
import numpy as np
from scipy.fft import rfft, rfftfreq
from scipy import signal
from scipy.stats import kurtosis
import scipy.stats as stats
from scipy.signal import find_peaks
from scipy.stats import entropy
from tqdm import tqdm

In [2]:
# Import functions to read and sample data
import Read_and_Sample_Data
from Read_and_Sample_Data import read_dataset
from Read_and_Sample_Data import down_sample

metadata = pd.read_csv('metadata.csv')

In [3]:
# Import functions to extract frequency and time features from the data
import Data_Transforms
from Data_Transforms import FFT
from Data_Transforms import time_features

## Test 1

### Read data

In [4]:
test_1 = read_dataset(data_dir= metadata['dir'][0], 
                      first_ts= pd.to_datetime(metadata['start_ts'][0]),
                      colnames= eval(metadata['colnames'][0]),
                      rate=20480)

100%|██████████████████████████████████████████████████████████████████████████████| 2156/2156 [01:26<00:00, 24.91it/s]


In [5]:
test_1.head()

Unnamed: 0,b1x,b1y,b2x,b2y,b3x,b3y,b4x,b4y,time,measurement_id,counter
0,-0.022,-0.039,-0.183,-0.054,-0.105,-0.134,-0.129,-0.142,0.0,0,0
1,-0.105,-0.017,-0.164,-0.183,-0.049,0.029,-0.115,-0.122,4.9e-05,0,1
2,-0.183,-0.098,-0.195,-0.125,-0.005,-0.007,-0.171,-0.071,9.8e-05,0,2
3,-0.178,-0.161,-0.159,-0.178,-0.1,-0.115,-0.112,-0.078,0.000146,0,3
4,-0.208,-0.129,-0.261,-0.098,-0.151,-0.205,-0.063,-0.066,0.000195,0,4


### Extract Frequency attributes via Fast Fourier Transform

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][0])[0:8]:
    arr = FFT(down_sample(test_1, col, q), rate, q) # Set sampling rate
    pd.DataFrame(arr).to_csv('Data/IMS/FFT/fft_' + col + '_ds' + str(q) + '.csv', header=False, index=False)

### Extract Time Series attributes

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][0])[0:8]:
    df = time_features(down_sample(test_1, col, q), rate, q)
    df.to_csv('Data/IMS/TS/ts_' + col + '_ds' + str(q) + '.csv', header=True, index=False)

## Test 2

In [None]:
del (test_1)
import gc
gc.collect()
test_1=pd.DataFrame()

In [None]:
test_2 = read_dataset(data_dir= metadata['dir'][1], 
                      first_ts= pd.to_datetime(metadata['start_ts'][1]),
                      colnames= eval(metadata['colnames'][1]),
                      rate=20480)

### Extract Frequency attributes via Fast Fourier Transform

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][1])[0:4]:
    arr = FFT(down_sample(test_2, col, q), rate, q) # Set sampling rate
    pd.DataFrame(arr).to_csv('Data/IMS/FFT/fft_T2_' + col + '_ds' + str(q) + '.csv', header=False, index=False)

### Extract Time Series attributes

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][1])[0:4]:
    df = time_features(down_sample(test_2, col, q), rate, q)
    df.to_csv('Data/IMS/TS/ts_T2_' + col + '_ds' + str(q) + '.csv', header=True, index=False)

## Test 3

In [None]:
del (test_2)
import gc
gc.collect()
test_2=pd.DataFrame()

In [None]:
test_3 = read_dataset(data_dir= metadata['dir'][2], 
                      first_ts= pd.to_datetime(metadata['start_ts'][2]),
                      colnames= eval(metadata['colnames'][2]),
                      rate=20480)

In [None]:
test_3.shape

In [None]:
test_3.head()

### Extract Frequency attributes via Fast Fourier Transform

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][2])[0:4]:
    arr = FFT(down_sample(test_3, col, q), rate, q) # Set sampling rate
    pd.DataFrame(arr).to_csv('Data/IMS/FFT/fft_T3_' + col + '_ds' + str(q) + '.csv', header=False, index=False)

### Extract Time Series attributes

In [None]:
q = 10
rate = 20480

for col in eval(metadata['colnames'][2])[0:4]:
    df = time_features(down_sample(test_3, col, q), rate, q)
    df.to_csv('Data/IMS/TS/ts_T3_' + col + '_ds' + str(q) + '.csv', header=True, index=False)

In [None]:
del (test_3)
import gc
gc.collect()
test_3=pd.DataFrame()