In [None]:
from datetime import datetime

import torch
from config import *

import matplotlib.pyplot as plt

from data.compress import *
from data.util import count_points_in_period, crop_q_between

from torchmetrics import MeanSquaredError as MSE, MeanAbsolutePercentageError as MAPE


%reload_ext autoreload
%autoreload 2

Load matrix $Q$

In [None]:
mat_q = CONFIG.load('mat_q.pt')
mat_q = torch.abs(mat_q)
mat_q.shape

Crop $Q$ to the required time period

In [None]:
n_samples, n_sections = mat_q.shape
mat_q = crop_q_between(mat_q, CONFIG.read_period, CONFIG.train_period)
assert mat_q.shape == (count_points_in_period(CONFIG.train_period), n_sections)
mat_q.shape

Construct a correlation coefficient matrix
$$
R(i, j)=\frac{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))(z(s_j,t_k)-\tilde{z}(s_j))}{\sqrt{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))^2}\sqrt{\sum_{k=1}^d(z(s_j,t_k)-\tilde{z}(s_j))^2}},
$$
where $$\tilde{z}(s_i)=\frac{1}{d}\sum_{k=1}^dz(s_i,t_k)$$

In [None]:
mat_r, nonempty = build_correlation_matrix(mat_q, True)
mat_q = mat_q[:, nonempty]
mat_r.shape, nonempty

In [None]:
alpha_groups = []
for alpha in torch.arange(0, 1.05, 0.05):
    print(alpha)
    groups = split_sections_into_groups(mat_r, alpha)
    alpha_groups.append([alpha, len(groups)])
alpha_groups = torch.tensor(alpha_groups)
alpha_groups

In [None]:
plt.plot(alpha_groups[:, 0], alpha_groups[:, 1])
plt.xlabel(r'Correlation coefficient $\alpha$')
plt.ylabel('Number of groups')

In [None]:
alpha_groups = []
for alpha in torch.arange(0.9, 1.01, 0.01):
    groups = split_sections_into_groups(mat_r, alpha)
    alpha_groups.append([alpha, len(groups)])
alpha_groups = torch.tensor(alpha_groups)
alpha_groups

Analyse grouping

In [None]:
_, n_sections = mat_q.shape
print(f'Using alpha={CONFIG.alpha}, {n_sections} correlated sections were divided ' +
      f'into {len(groups)} groups:')
for i, group in enumerate(groups, start=1):
      print(f'Group {i} - {len(group)} sections: {", ".join(str(s) for s in group)}')


In [None]:
mse = MSE().to(CONFIG.device)
mape = MAPE().to(CONFIG.device)

from stages import compress_data

def losses_for_alpha(alpha):
    # groups = split_sections_into_groups(mat_r, alpha)
    # mat_c, representatives = get_compression_matrix(mat_q, groups)
    # mat_q.shape, mat_c.shape
    # x = torch.linalg.pinv(mat_c) @ mat_q
    # reproduce = mat_c @ x

    mat_q = CONFIG.load('mat_q.pt')
    mat_c, mat_x, nonempty, representatives = compress_data(mat_q, CONFIG.read_period, CONFIG.test_period, alpha)
    mat_q = crop_q_between(mat_q[:, nonempty], CONFIG.read_period, CONFIG.test_period)
    _reproduce = decompress(mat_c, mat_x)

    return mse(mat_q.T, _reproduce.T), mape(mat_q.T, _reproduce.T)

def losses_for_alpha_range(begin, end, step = None):
    if step is None:
        step = float(end - begin) / 10
    losses = []
    for alpha in torch.arange(begin, end, step).to(CONFIG.device):
        losses.append(torch.stack([alpha, *losses_for_alpha(alpha)]))
    losses = torch.stack(losses).T

    return losses

fig, axes = plt.subplots(1, 2, figsize=(10, 3))
losses = losses_for_alpha_range(0.0, 1.0, 0.01)
axes[0].set_title('Mean Squared Error')
axes[0].plot(losses[0].cpu(), losses[1].cpu())
axes[0].set_xlabel(r'Correlation coefficient $\alpha$')
axes[0].set_ylabel('Error')
axes[1].set_title('Mean Average Percentage Error')
axes[1].plot(losses[0].cpu(), losses[2].cpu())
axes[1].set_xlabel(r'Correlation coefficient $\alpha$')

print(losses[0][torch.where(losses[1] < 100)])
print(losses[0][torch.where(losses[2] < 100)])

fig.tight_layout()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
losses = losses_for_alpha_range(0.0, 1.0, 0.01)
losses_mse = losses_for_alpha_range(0.94, 1.0)
losses_mape = losses_for_alpha_range(0.92, 1.0)
axes[0].set_title('Mean Squared Error')
axes[0].plot(losses_mse[0].cpu(), losses_mse[1].cpu())
axes[0].set_xlabel(r'Correlation threshold $\alpha$')
axes[0].set_ylabel('Error')
axes[1].set_title('Mean Average Percentage Error')
axes[1].plot(losses_mape[0].cpu(), losses_mape[2].cpu())
axes[1].set_xlabel(r'Correlation threshold $\alpha$')


In [None]:
mat_c, representatives = get_compression_matrix(mat_q, groups)

assert mat_c.shape == (mat_q.shape[0], len(groups))
CONFIG.save(mat_c, 'mat_c.pt')
mat_c.shape

In [None]:
mat_x = get_compressed_matrix(mat_c, mat_q)
CONFIG.save(mat_x, 'mat_x.pt')
mat_x.shape

In [None]:
torch.sum(torch.abs((mat_c @ mat_x) - mat_q))

In [None]:
groups = split_sections_into_groups(mat_r, 0.99)
[(i, g) for i, g in enumerate(groups) if len(g) > 1]

In [None]:
def get_missing(alpha):
    groups = split_sections_into_groups(mat_r, alpha)
    _, representatives = get_compression_matrix(mat_q, groups)
    n_sections = mat_q.shape[1]
    present = torch.zeros(n_sections, dtype=bool)
    present[representatives] = True
    return ~present

missing_099 = get_missing(0.99)

for alpha in torch.concat([
    torch.arange(0, 0.95, 0.05),
    torch.arange(0.9, 1.0, 0.01)
]):
    missing_alpha = get_missing(alpha)
    missing_both = torch.bitwise_and(missing_099, missing_alpha)
    assert missing_099.sum() == missing_both.sum()

CONFIG.save(torch.where(missing_099), 'missing_0.99.pt')
