In [None]:
from qcm_data import QCMData, WindowDataset, AutoregressiveLSTM
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel, ExpSineSquared
from sklearn.model_selection import train_test_split
from scipy.ndimage import uniform_filter, gaussian_filter
from scipy.signal import find_peaks
from scipy.interpolate import interp1d

In [None]:
data = QCMData.import_hdf('data/Z-230203B.h5', 'data')

In [None]:
qcm = data.extract_rotations('s1', limits=(np.deg2rad(270), np.deg2rad(390)))

dqcm = np.diff(qcm)
mins = find_peaks(-dqcm)[0]
bgd = interp1d(mins, dqcm[mins], 'linear', fill_value='extrapolate')(np.arange(dqcm.size))

maxs = find_peaks(dqcm)[0]
maxs = maxs[dqcm[maxs] > 0.8 * (np.max(dqcm) - np.min(dqcm)) + np.min(dqcm)]
period = (maxs[1:] - maxs[:-1]).mean()

sgn = period * uniform_filter(dqcm - bgd, period, mode='nearest')

In [None]:
%matplotlib widget
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(np.arange(dqcm.size), dqcm)
ax.plot(np.arange(bgd.size), bgd)
ax.plot(np.arange(sgn.size), sgn)
ax.grid(True)
plt.show()

# Tree Regressors

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.multioutput import RegressorChain

lag = 500
overhead = 50

x_train = np.lib.stride_tricks.sliding_window_view(ddqcm[:5000], lag)
y_train = np.lib.stride_tricks.sliding_window_view(ddqcm[lag:5000 + overhead], overhead)

x_test = np.lib.stride_tricks.sliding_window_view(ddqcm[5000:6000], lag)
y_test = np.lib.stride_tricks.sliding_window_view(ddqcm[5000 + lag:6000 + overhead], overhead)

print(x_train.shape, y_train.shape)

rf_reg = RandomForestRegressor(max_depth=10, n_estimators=100)

gb_reg = RegressorChain(GradientBoostingRegressor(random_state=42))

rf_reg.fit(x_train, y_train)

In [None]:
%matplotlib widget
idx = 10

pred_forest = np.stack([mdl.predict(x_train[[idx]])[0] for mdl in rf_reg.estimators_])
lower = np.quantile(pred_forest, 0.05, axis=0)
upper = np.quantile(pred_forest, 0.95, axis=0)

fig, ax = plt.subplots()
ax.plot(np.arange(idx, idx + lag), dqcm[idx] + np.cumsum(x_train[idx]), c='b')
ax.plot(np.arange(idx + lag, idx + lag + overhead) - 1,
        dqcm[lag + idx] + np.cumsum(np.append(0, y_train[idx]))[:-1],
        c='g', linestyle='dashed')
ax.plot(np.arange(idx + lag, idx + lag + overhead) - 1,
        dqcm[lag + idx] + np.cumsum(np.append(0, tree.predict(x_train[[idx]])[0]))[:-1], c='r')
ax.fill_between(np.arange(idx + lag, idx + lag + overhead) - 1,
                dqcm[lag + idx] + np.cumsum(np.append(0, lower))[:-1],
                dqcm[lag + idx] + np.cumsum(np.append(0, upper))[:-1], color='r', alpha=0.2)
fig.tight_layout()
plt.show()

In [None]:
%matplotlib widget
idx = 40

pred_forest = np.stack([mdl.predict(x_test[[idx]])[0] for mdl in tree.estimators_])
lower = np.quantile(pred_forest, 0.05, axis=0)
upper = np.quantile(pred_forest, 0.95, axis=0)

fig, ax = plt.subplots()
ax.plot(np.arange(idx, idx + lag), dqcm[5000 + idx] + np.cumsum(x_test[idx]), c='b')
ax.plot(np.arange(idx + lag, idx + lag + overhead) - 1,
        dqcm[5000 + lag + idx] + np.cumsum(np.append(0, y_test[idx]))[:-1],
        c='b', linestyle='dashed')
ax.plot(np.arange(idx + lag, idx + lag + overhead) - 1,
        dqcm[5000 + lag + idx] + np.cumsum(np.append(0, tree.predict(x_test[[idx]])[0]))[:-1], c='r')
ax.fill_between(np.arange(idx + lag - 1, idx + lag + overhead - 1),
                dqcm[5000 + lag + idx] + np.cumsum(np.append(0, lower))[:-1],
                dqcm[5000 + lag + idx] + np.cumsum(np.append(0, upper))[:-1],
                color='r', alpha=0.2)
fig.tight_layout()
plt.show()

# Gaussian Process

In [None]:
x = np.arange(dqcm.size)[:, None]
k0 = WhiteKernel(noise_level=0.5)

k1 = ConstantKernel(constant_value=0.5) * \
     ExpSineSquared(length_scale=1.0, periodicity=100, periodicity_bounds=(50, 200))

k2 = ConstantKernel(constant_value=0.5) * \
     RBF(length_scale=2e2, length_scale_bounds=(1e2, 1e3))

kernel = k0 + k2
model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True)

In [None]:
x_train, y_train = x[:1000], ddqcm[:1000]
x_test, y_test = x[1000:1500], ddqcm[1000:1500]

In [None]:
model.fit(x_train, y_train)
params = model.kernel_.get_params()
params

In [None]:
samples = model.sample_y(x_train, n_samples=100)

fig, ax = plt.subplots()
for i in range(100):
    sns.lineplot(x=x_train[..., 0], y=samples[:, i], alpha=0.2, ax=ax,
                 color=sns.color_palette(palette='deep')[1])
sns.lineplot(x=x_train[..., 0], y=y_train, label='y2', ax=ax,
             color=sns.color_palette(palette='deep')[0]) 
fig.tight_layout()
plt.show()

In [None]:
y_pred, std = model.predict(np.concatenate((x_train, x_test)), return_std=True)

In [None]:
%matplotlib widget
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(np.concatenate((x_train, x_test)), np.cumsum(np.concatenate((y_train, y_test))))
ax.plot(np.concatenate((x_train, x_test)), np.cumsum(y_pred))
ax.fill_between(np.concatenate((y_train, y_test)), y_pred - std, y_pred + std, alpha=0.1)
ax.grid(True)
plt.show()