# check results of cluster sweep
- running CCA- and low-rank linear method for different $n$ and different input data fields
- comparing fields of anomaly correlation coefficients on test data

In [None]:
%matplotlib inline
from netCDF4 import Dataset

import numpy as np
import matplotlib.pyplot as plt
import seaborn

from predictability_utils.utils import helpers, io, viz

root_data = '../../data/pyrina'
root_results = '../../results/pyrina'

data_path= "$HOME/projects/seasonal_forecasting/results/pyrina"
fields = ["swvl1", "msl", "t2m"]
regions = ["EU", "NA-EU", "EU"]
preprocesses = ["anomalies", "anomalies", "anomalies"]
lrs = [1e-2, 1e-9, 5e-5]
n_min,n_max = 1, 5

train_months = [2,3,4]
test_months = [5,6,7]
m_train = ''.join([str(i) for i in train_months]) # strings for save-file
m_test = ''.join([str(i) for i in test_months])   # identification
y_train = 51

node_off = 0
cmax, cmin = 0.6, -0.6
for field, region, preprocess, lr in zip(fields, regions, preprocesses, lrs):

    fig = plt.figure(figsize=(16,5))
    
    for n_latents in range(n_min,n_max+1):

        sv_fn = f'/{field}_ERA20c_monthly_1900-2010_{region}_{preprocess}__s{m_train}_t{m_test}_split{y_train}__n{n_latents}_'

        for i, method in enumerate(['CCA', 'LRL']):
            plt.subplot(2,5, n_latents + i*5)
            anomaly_corrs_map = np.load(root_results + sv_fn + method + '.npy').reshape(37,42)
            #cmax = np.max(np.abs(anomaly_corrs_map))
            plt.imshow(anomaly_corrs_map, cmap='seismic', vmin=cmin, vmax=cmax)
            plt.colorbar(ticks=[0.5, 0, -0.5])
            plt.title(f'avg: {anomaly_corrs_map.mean():.3f}')
            #plt.ylabel('n =' +str(n_latents))
            plt.xticks([])
            plt.yticks([])
        plt.xlabel('n =' +str(n_latents))
    plt.subplot(2,5,1)
    plt.ylabel('CCA')
    plt.subplot(2,5,6)
    plt.ylabel('LR-linear')
    plt.suptitle(f"T2ms (summer) predicted from {field}.{region}.{preprocess} (spring)")
    plt.show()
        
        

# debug