In [24]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

In [82]:
data_dir = Path('data')

X_train = np.load(data_dir / 'X_train.npz')['arr_0']
X_test = np.load(data_dir / 'X_test.npz')['arr_0']
Y_train = np.load(data_dir / 'Y_train.npz')['arr_0']

In [83]:
half = X_train.shape[1] // 2
X_train_first_half = X_train[:, :half]
X_test_first_half = X_test[:, :half]
X_first_half = {'train': X_train_first_half, 'test': X_test_first_half}
X_train_last_half = X_train[:, half:]
X_test_last_half = X_test[:, half:]
X_last_half = {'train': X_train_last_half, 'test': X_test_last_half}

In [96]:
def gen_first_half(train_or_test):
    file_path = data_dir / 'X_{}_first_half.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        np.savez(file_path, X_first_half[train_or_test])

In [97]:
gen_first_half('train')
gen_first_half('test')

In [94]:
def gen_first_half_stats(train_or_test):
    file_path = data_dir / 'X_{}_first_half_stats.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        X = X_first_half[train_or_test]
        X_first_half_stats = np.vstack([
            X.min(axis=1),
            X.max(axis=1),
            X.mean(axis=1),
            X.std(axis=1),
            (X[:, -1] - X[:, 0]) / half,
        ]).T
        np.savez(file_path, X_first_half_stats)

In [95]:
gen_first_half_stats('train')
gen_first_half_stats('test')

data/X_train_first_half_stats.npz already exists.
data/X_test_first_half_stats.npz already exists.


In [93]:
def gen_first_half_deviation(train_or_test):
    file_path = data_dir / 'X_{}_first_half_deviation.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        X = X_first_half[train_or_test]
        X_first_half_deviation = np.zeros_like(X)
        for i in range(len(X)):
            slope = (X[i, -1] - X[i, 0]) / half
            X_first_half_deviation[i] = X[i, 0] + slope * np.arange(half) - X[i, :]
        np.savez(file_path, X_first_half_deviation)

In [88]:
gen_first_half_deviation('train')
gen_first_half_deviation('test')

In [98]:
def gen_last_half(train_or_test):
    file_path = data_dir / 'X_{}_last_half.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        np.savez(file_path, X_last_half[train_or_test])

In [99]:
gen_last_half('train')
gen_last_half('test')

In [101]:
n_groups = 50
group_size = half // n_groups

In [123]:
def gen_last_half_group_min(train_or_test):
    file_path = data_dir / 'X_{}_last_half_group_min.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        X = X_last_half[train_or_test]
        group_min = np.vstack([X[:, group_size*i + (i+1)] for i in range(n_groups)]).T
        np.savez(file_path, group_min)

In [124]:
gen_last_half_group_min('train')
gen_last_half_group_min('test')

In [129]:
def gen_last_half_rfft_first500_20peaks(train_or_test):
    file_path = data_dir / 'X_{}_last_half_rfft_first500_20peaks.npz'.format(train_or_test)
    if file_path.exists():
        print('{} already exists.'.format(file_path))
    else:
        X = X_last_half[train_or_test]
        rfft = np.fft.rfft(X).real
        rfft_first500_20peaks = np.concatenate([rfft[:, 0:500:50], rfft[:, 50-1:500:50]])
        np.savez(file_path, rfft_first500_20peaks)

In [130]:
gen_last_half_rfft_first500_20peaks('train')
gen_last_half_rfft_first500_20peaks('test')

data/X_train_last_half_rfft_first500_20peaks.npz already exists.
data/X_test_last_half_rfft_first500_20peaks.npz already exists.
