In [None]:
from datetime import datetime

import torch
from config import *

from data.compress import *
from data.util import count_points_in_period, crop_q_between

%reload_ext autoreload
%autoreload 2

Load matrix $Q$

In [None]:
mat_q = CONFIG.load('mat_q_resid.pt')
mat_q = torch.abs(mat_q)
mat_q.shape

Crop $Q$ to the required time period

In [None]:
n_samples, n_sections = mat_q.shape
mat_q = crop_q_between(mat_q, CONFIG.read_period, CONFIG.train_period)
assert mat_q.shape == (count_points_in_period(CONFIG.train_period), n_sections)
mat_q.shape

Construct a correlation coefficient matrix
$$
R(i, j)=\frac{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))(z(s_j,t_k)-\tilde{z}(s_j))}{\sqrt{\sum_{k=1}^d(z(s_i,t_k)-\tilde{z}(s_i))^2}\sqrt{\sum_{k=1}^d(z(s_j,t_k)-\tilde{z}(s_j))^2}},
$$
where $$\tilde{z}(s_i)=\frac{1}{d}\sum_{k=1}^dz(s_i,t_k)$$

In [None]:
mat_r, nonempty = build_correlation_matrix(mat_q, True)
mat_r.shape, nonempty

In [None]:
groups = split_sections_into_groups(mat_r, CONFIG.alpha)
set([len(g) for g in groups]), len(groups)

Analyse grouping

In [None]:
_, n_sections = mat_q.shape
print(f'Using alpha={CONFIG.alpha}, {n_sections} correlated sections were divided ' +
      f'into {len(groups)} groups:')
for i, group in enumerate(groups, start=1):
      print(f'Group {i} - {len(group)} sections: {", ".join(str(s) for s in group)}')


In [None]:
mat_c = get_compression_matrix(mat_q, groups)

assert mat_c.shape == (mat_q.shape[0], len(groups))
CONFIG.save(mat_c, 'mat_c.pt')
mat_c.shape

In [None]:
mat_x = get_compressed_matrix(mat_c, mat_q)
CONFIG.save(mat_x, 'mat_x.pt')
mat_x.shape