# Imports

In [None]:
import os

import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import pandas as pd
import scipy.signal

from IPython.core.display import display, HTML

# Parameters

In [None]:
raw_dirname = "../data"
raw_basename = "raw_mechanical_loading_data.csv.gz"

results_dirname = "../data"
results_basename = "mechanical_loading_data.csv.gz"

delta = 0.1
derivative_orders = [1]

# Function Definitions

In [None]:
def get_unique(items):
    seen = set()
    unique = list()
    for item in items:
        if item not in seen:
            unique.append(item)
            seen.add(item)
    return unique

# Data Loading

In [None]:
filename = os.path.join(raw_dirname, raw_basename)
raw_df = pd.read_csv(filename, header=[0,1,2], index_col=0, compression="gzip")
raw_df.info(verbose=True)
with pd.option_context("display.max_rows", 10, "display.max_columns", None):
    display(raw_df)
    display(raw_df.describe())

# Data Processing

## Missing Values

In [None]:
data_df = raw_df.reindex(
    np.arange(raw_df.index.min(), raw_df.index.max()+delta, delta),
    method="nearest",
    tolerance=0.01
)
data_df.interpolate(method="linear", axis="index", inplace=True, limit_area="inside")

data_df.info(verbose=True)
with pd.option_context("display.max_rows", 10, "display.max_columns", None):
    display(data_df)
    display(data_df.describe())

## Feature Engineering

In [None]:
level0 = get_unique(data_df.columns.get_level_values(0))
level1 = get_unique(data_df.columns.get_level_values(1))
level2 = get_unique(data_df.columns.get_level_values(2))
print(", ".join(level0))
print(", ".join(level1))
print(", ".join(level2))

column_order = list()
for c0 in level0:
    for c1 in level1:
        column_order.extend([
            (c0,c1,c2)
            for c2 in level2
        ])
        for order in derivative_orders:
            for c2 in level2:
                new_column = (c0,c1,"{} (gradient {})".format(c2,order))
                column_order.append(new_column)
                values = data_df[(c0,c1,c2)]
                for ii in range(order):
                    values = np.gradient(
                        values,
                        delta
                    )
                data_df[new_column] = values
            
            for c2 in level2:
                new_column = (c0,c1,"{} (savgol {})".format(c2,order))
                column_order.append(new_column)
                data_df[new_column] = scipy.signal.savgol_filter(
                    data_df[(c0,c1,c2)],
                    window_length=99,
                    polyorder=2,
                    deriv=order,
                    delta=delta
                )
data_df = data_df[column_order]

# data_df.info(verbose=True)
with pd.option_context("display.max_rows", 10, "display.max_columns", None):
    display(data_df)

# Plots

In [None]:
level0 = get_unique(data_df.columns.get_level_values(0))
level1 = get_unique(data_df.columns.get_level_values(1))
level2 = get_unique(data_df.columns.get_level_values(2))

for c0 in level0:
    display(HTML(f"<h1>{c0}</h1>"))
    
    for c1 in level1:
        display(HTML(f"<h2>{c1}</h2>"))
        
        n_cols = 3
        n_rows = int(np.ceil(len(level2)/n_cols))
#         fig = plt.figure(constrained_layout=True, figsize=(n_cols*8,5*n_rows))
        fig = plt.figure(constrained_layout=True, figsize=(8*n_cols,5*n_rows))
        gs = mpl.gridspec.GridSpec(
            nrows=n_rows,
            ncols=n_cols,
            figure=fig
        )
        for ii,c2 in enumerate(level2):
            ax = fig.add_subplot(gs[ii])
            ax.plot(data_df[(c0,c1,c2)], zorder=1)
            ax.set_title(c2)
            ax.set_xlabel("Time")
        fig.suptitle("{}, {}".format(c0,c1))
        plt.show()
    break

# Save Data

In [None]:
filename = os.path.join(results_dirname, results_basename)
print(filename)
data_df.to_csv(filename, compression="gzip")