In [32]:
import numpy as np
from sklearn.datasets import make_sparse_spd_matrix
from scipy import linalg as LA
import pandas as pd
import time, os
import matplotlib.pyplot as plt

from pyinstrument import Profiler

from infoband.band_info import InfoCorrBand
from wlpy.covariance import Covariance
from utils.adpt_correlation_threshold import AdptCorrThreshold
from wlpy.gist import heatmap

import warnings
warnings.filterwarnings("ignore")

from my_api import *

# data processing

Use the error rate as our metric.
$$ \frac{\| A - \hat A \|}{\| A \|} $$

### Results of other methods

In [33]:
df = pd.read_csv('other_methods.csv')
df.drop(columns = df.columns[0], inplace = True)
df.head(3)

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink
0,100,100,0.8,fro,R,0.437159,0.412584,0.437159,0.405599,
1,100,100,0.8,fro,S,0.468781,0.423442,0.468781,0.419056,
2,100,100,0.8,2,R,0.419901,0.411377,0.348096,0.455421,


### Results of our proposed methods

In [34]:
data_path = 'data_2023-01-17/'

In [35]:
def compute_error(row, eta):
    N, T, rho, ord, type = row['N'], row['T'], row['rho'], row['ord'], row['type']
    if type == 'R':
        cov_cor = 'cor'
    else:
        cov_cor = 'cov'
    file_name = get_title_1(ord = ord, cov_cor = cov_cor, eta = eta, N = N, T = T, rho = rho, draw_type = 'random')
    file_path = data_path + file_name + '.txt'
    try:
        data = np.loadtxt(file_path)
        return data.mean()
    except FileNotFoundError as e:
        return None

In [36]:
eta_range = [0.5, 0.8, 0.95, 1]
for eta in eta_range:
    df['Info Band \\( \\eta={} \\)'.format(eta)] = df.apply(func = compute_error, axis = 1, args = (eta, ))
df.head(3)

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band \( \eta=0.5 \),Info Band \( \eta=0.8 \),Info Band \( \eta=0.95 \),Info Band \( \eta=1 \)
0,100,100,0.8,fro,R,0.437159,0.412584,0.437159,0.405599,,0.423696,0.330569,0.225972,0.226556
1,100,100,0.8,fro,S,0.468781,0.423442,0.468781,0.419056,,0.449804,0.365138,0.276803,0.277256
2,100,100,0.8,2,R,0.419901,0.411377,0.348096,0.455421,,0.363267,0.298962,0.237871,0.238219


### to latex

In [37]:
oT = (df['T'] == 100) # & (df['N'] == 500)
tmp = df[(df['type'] == 'S') & (df['ord'] == '2')]
tmp.drop(columns = ['ord', 'type'], inplace = True)
tmp.head(3)
# tmp.groupby(by = ['rho', 'N', 'T']).to_frame()

Unnamed: 0,N,T,rho,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band \( \eta=0.5 \),Info Band \( \eta=0.8 \),Info Band \( \eta=0.95 \),Info Band \( \eta=1 \)
3,100,100,0.8,0.487316,0.438334,0.431773,0.471418,,0.392955,0.333804,0.28515,0.285429
7,100,100,0.95,0.2183,0.320215,0.2183,0.259828,,0.228168,0.264165,0.241555,0.234438
11,100,300,0.8,0.265477,0.808214,0.187939,0.243555,0.228187,0.248548,0.180294,0.16634,0.162151


In [38]:
tmp.set_index(keys = ['rho', 'N', 'T'], inplace = True)
tmp.sort_index(inplace = True)
print(tmp.shape)
tmp.head(3)

(18, 9)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band \( \eta=0.5 \),Info Band \( \eta=0.8 \),Info Band \( \eta=0.95 \),Info Band \( \eta=1 \)
rho,N,T,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0.8,100,100,0.487316,0.438334,0.431773,0.471418,,0.392955,0.333804,0.28515,0.285429
0.8,100,300,0.265477,0.808214,0.187939,0.243555,0.228187,0.248548,0.180294,0.16634,0.162151
0.8,100,500,0.234285,0.234285,0.234258,0.191221,0.185724,0.204579,0.145436,0.133073,0.143757


In [42]:
help('pandas.DataFrame.to_latex')

Help on function to_latex in pandas.DataFrame:

pandas.DataFrame.to_latex = to_latex(self, buf: 'FilePath | WriteBuffer[str] | None' = None, columns: 'Sequence[Hashable] | None' = None, col_space: 'ColspaceArgType | None' = None, header: 'bool_t | Sequence[str]' = True, index: 'bool_t' = True, na_rep: 'str' = 'NaN', formatters: 'FormattersType | None' = None, float_format: 'FloatFormatType | None' = None, sparsify: 'bool_t | None' = None, index_names: 'bool_t' = True, bold_rows: 'bool_t' = False, column_format: 'str | None' = None, longtable: 'bool_t | None' = None, escape: 'bool_t | None' = None, encoding: 'str | None' = None, decimal: 'str' = '.', multicolumn: 'bool_t | None' = None, multicolumn_format: 'str | None' = None, multirow: 'bool_t | None' = None, caption: 'str | tuple[str, str] | None' = None, label: 'str | None' = None, position: 'str | None' = None) -> 'str | None'
    Render object to a LaTeX tabular, longtable, or nested table.
    
    Requires ``\usepackage{booktabs}

In [43]:
col_num = tmp.shape[1]
col_num

9

In [40]:
with pd.option_context("max_colwidth", 1000):
    tmp.to_latex(buf = 'latex/table2-2_lx.tex', col_space = [1.2] * tmp.shape[1],
             index = True, na_rep = '', 
             float_format = "%.2f", sparsify = True, 
             column_format = "lll|p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}p{1cm}", 
             longtable = True, 
             caption = "The estimation error comparison in terms of the Matrix-2 Norm", 
             label = "t:2-band")

# find conditions with best performance

Given (N, T, rho, ord, type), find the method with lowest error rate in the list $other\_cols$, which is defined as:

In [6]:
other_cols = ['Sample', 'Soft Threshold', 'Hard Threshold', 'Linear Shrink', 'Nonlinear Shrink']

In [7]:
# type(row) = pandas.Series, with indexes ['Sample', 'Soft Threshold', ...]
get_min_key = lambda row: min(row.index, key = lambda x: row[x]) 
df['min_other_key'] = df[other_cols].apply(func = get_min_key, axis = 1)
get_min_val = lambda row: min(row)
df['min_other_val'] = df[other_cols].apply(func = get_min_val, axis = 1)
df['min_other_key'].value_counts()

Soft Threshold      39
Linear Shrink       12
Hard Threshold      12
Nonlinear Shrink     7
Sample               2
Name: min_other_key, dtype: int64

### Compute ratios

For each row, given $\eta$, use the ratio 
$$ \frac{\text{lowest error rate of other methods (i.e. }min\_other\_val)}{\text{error rate of the proposed estimator}} $$

as a rule to find the parameters (N, T, rho) with best results.

In [44]:
for eta in eta_range:
    df['ratio_eta={}'.format(eta)] = df['min_other_val'] / df['Info Band eta={}'.format(eta)]
df.head(3)

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band eta=0.5,Info Band eta=0.8,Info Band eta=0.95,Info Band eta=1,min_other_key,min_other_val,ratio_eta=0.5,ratio_eta=0.8,ratio_eta=0.95,ratio_eta=1
0,100,100,0.8,fro,R,0.437159,0.412584,0.437159,0.405599,,0.423696,0.330569,0.225972,0.226556,Linear Shrink,0.405599,0.957286,1.226973,1.794911,1.790279
1,100,100,0.8,fro,S,0.468781,0.423442,0.468781,0.419056,,0.449804,0.365138,0.276803,0.277256,Linear Shrink,0.419056,0.931642,1.147666,1.513913,1.511442
2,100,100,0.8,2,R,0.419901,0.411377,0.348096,0.455421,,0.363267,0.298962,0.237871,0.238219,Hard Threshold,0.348096,0.958236,1.164347,1.463378,1.461242


### Sort the computed ratio

In [47]:
df.shape

(72, 20)

In [53]:
param_cols = ['N', 'T', 'rho']
ratio_cols = ['ratio_eta={}'.format(eta) for eta in eta_range]
print(ratio_cols)

['ratio_eta=0.5', 'ratio_eta=0.8', 'ratio_eta=0.95', 'ratio_eta=1']


In [59]:
for eta in eta_range:
    sort_col = 'ratio_eta={}'.format(eta)
    for ord in ['fro', '2']:
        for type in ['R', 'S']:
            print(ord, type, eta, sep = ' ')
            tmp = df[(df['ord'] == ord) & (df['type'] == type)] 
            tmp = tmp.sort_values(by = sort_col, ascending = False)[param_cols + ratio_cols]
            print(tmp.head(2))

fro R 0.5
      N    T   rho  ratio_eta=0.5  ratio_eta=0.8  ratio_eta=0.95  ratio_eta=1
48  500  100  0.80       1.087016       1.412228        1.847649     1.800520
20  100  500  0.95       0.979090       0.979090        0.795588     0.762526
fro S 0.5
      N    T  rho  ratio_eta=0.5  ratio_eta=0.8  ratio_eta=0.95  ratio_eta=1
49  500  100  0.8       1.058358       1.312218        1.598181     1.570540
41  300  500  0.8       0.942082       1.335859        2.216500     2.361762
2 R 0.5
      N    T   rho  ratio_eta=0.5  ratio_eta=0.8  ratio_eta=0.95  ratio_eta=1
50  500  100  0.80       1.467744       1.983244        2.281038     2.208524
54  500  100  0.95       1.397420       1.984727        2.492834     2.607874
2 S 0.5
      N    T  rho  ratio_eta=0.5  ratio_eta=0.8  ratio_eta=0.95  ratio_eta=1
35  300  300  0.8       1.415961       1.727682        1.525635     1.477475
27  300  100  0.8       1.368773       1.595019        1.332332     1.332332
fro R 0.8
      N    T  rho  ratio

# Query

In [64]:
tmp = df[(df['ord'] == 'fro') & (df['type'] == 'S') & (df['N'] == 500)]
tmp

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band eta=0.5,Info Band eta=0.8,Info Band eta=0.95,Info Band eta=1,min_other_key,min_other_val,ratio_eta=0.5,ratio_eta=0.8,ratio_eta=0.95,ratio_eta=1
49,500,100,0.8,fro,S,1.047401,0.41941,0.880104,0.679519,,0.396283,0.319619,0.262429,0.267048,Soft Threshold,0.41941,1.058358,1.312218,1.598181,1.57054
53,500,100,0.95,fro,S,0.518841,0.392081,0.973919,0.450736,,0.47564,0.356634,0.274209,0.251642,Soft Threshold,0.392081,0.824324,1.099393,1.429863,1.558094
57,500,300,0.8,fro,S,0.609856,0.273069,0.883377,0.508406,,0.33524,0.244207,0.17148,0.164404,Soft Threshold,0.273069,0.814547,1.118185,1.592425,1.66096
61,500,300,0.95,fro,S,0.314768,0.2355,0.973713,0.297102,,0.408357,0.295583,0.202708,0.169351,Soft Threshold,0.2355,0.576703,0.796732,1.161774,1.390606
65,500,500,0.8,fro,S,0.47597,0.215561,0.883378,0.419988,,0.320097,0.224181,0.142013,0.131291,Soft Threshold,0.215561,0.673424,0.961548,1.517894,1.641862
69,500,500,0.95,fro,S,0.233995,0.17626,0.973639,0.224958,,0.24084,0.267434,0.172407,0.135612,Soft Threshold,0.17626,0.731856,0.659077,1.022348,1.299736


In [65]:
tmp = df[(df['ord'] == 'fro') & (df['type'] == 'S') & (df['N'] == 300)]
tmp

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band eta=0.5,Info Band eta=0.8,Info Band eta=0.95,Info Band eta=1,min_other_key,min_other_val,ratio_eta=0.5,ratio_eta=0.8,ratio_eta=0.95,ratio_eta=1
25,300,100,0.8,fro,S,0.839914,0.359178,0.884476,0.581832,,0.433204,0.357403,0.295806,0.295807,Soft Threshold,0.359178,0.829121,1.004968,1.214237,1.214231
29,300,100,0.95,fro,S,0.439947,0.285761,0.97401,0.349928,,0.497606,0.394539,0.327522,0.309901,Soft Threshold,0.285761,0.574272,0.724291,0.872496,0.922104
33,300,300,0.8,fro,S,0.488942,0.230638,0.880438,0.410773,,0.363705,0.26416,0.178208,0.172856,Soft Threshold,0.230638,0.634135,0.8731,1.294208,1.334281
37,300,300,0.95,fro,S,0.236401,0.176506,0.208291,0.218453,,0.240447,0.270614,0.192253,0.160788,Soft Threshold,0.176506,0.734073,0.65224,0.91809,1.097756
41,300,500,0.8,fro,S,0.377207,0.377197,0.377207,0.336998,0.325845,0.345877,0.243921,0.147009,0.137967,Nonlinear Shrink,0.325845,0.942082,1.335859,2.2165,2.361762
45,300,500,0.95,fro,S,0.180067,0.180067,0.153661,0.168953,0.169377,0.180067,0.218724,0.158541,0.122933,Hard Threshold,0.153661,0.853355,0.702531,0.969214,1.249955


In [67]:
tmp = df[(df['ord'] == 'fro') & (df['type'] == 'S') & (df['N'] == 100)]
tmp

Unnamed: 0,N,T,rho,ord,type,Sample,Soft Threshold,Hard Threshold,Linear Shrink,Nonlinear Shrink,Info Band eta=0.5,Info Band eta=0.8,Info Band eta=0.95,Info Band eta=1,min_other_key,min_other_val,ratio_eta=0.5,ratio_eta=0.8,ratio_eta=0.95,ratio_eta=1
1,100,100,0.8,fro,S,0.468781,0.423442,0.468781,0.419056,,0.449804,0.365138,0.276803,0.277256,Linear Shrink,0.419056,0.931642,1.147666,1.513913,1.511442
5,100,100,0.95,fro,S,0.235507,0.239158,0.236136,0.243439,,0.253561,0.323764,0.272303,0.260194,Sample,0.235507,0.928799,0.727404,0.864871,0.905122
9,100,300,0.8,fro,S,0.274187,0.27405,0.267349,0.256148,0.253258,0.354384,0.258011,0.172573,0.164797,Nonlinear Shrink,0.253258,0.714644,0.98158,1.46754,1.536793
13,100,300,0.95,fro,S,0.131696,0.131696,0.131696,0.119872,0.125182,0.131696,0.131696,0.148052,0.140719,Linear Shrink,0.119872,0.910217,0.910217,0.809659,0.851854
17,100,500,0.8,fro,S,0.207967,0.151988,0.167519,0.197419,0.195888,0.232465,0.217598,0.129592,0.121198,Soft Threshold,0.151988,0.653808,0.69848,1.172814,1.254042
21,100,500,0.95,fro,S,0.099378,0.090217,0.099378,0.09243,0.095949,0.099378,0.099378,0.110485,0.11255,Soft Threshold,0.090217,0.907816,0.907816,0.816552,0.801568
