In [1]:
%pylab inline
import sys,os
sys.path.insert(1,'../../python/')
from utils import * # COMAJE utilities
file_root='/mnt/store1/oliverphilcox/QPMCovariance/'

Populating the interactive namespace from numpy and matplotlib


#### Define QPM mock parameter class for matrix read-in

In [2]:
class QPM_Parameters:
    """Holds covariance matrix parameters for the QPM matrices. These are initially set to default values"""
    def __init__(self,mock_no,mean=False):
        self.n=39
        self.m=24
        self.a=1
        self.n_indiv=10
        if mean:
            self.infile_root=file_root+'HiResMean/'
            self.n_indiv=100
        else:
            self.infile_root=file_root+'Mock_%d/'%mock_no
        self.weights_file='/mnt/store1/oliverphilcox/QPM_weights/jackknife_weights_n39_m24_j169.dat'
        self.RR_file = '/mnt/store1/oliverphilcox/QPM_weights/binned_pair_counts_n39_m24_j169.dat'
        self.r_bins = np.loadtxt('/home/oliverphilcox/COMAJE/python/hybrid_binfile_cut.csv')         

## Load covariance with smooth $\xi$

In [3]:
p=QPM_Parameters(0,mean=True)
qpm_mean=CovarianceMatrix(p)

In [None]:
# Compute N_eff and precision matrix:
qpm_mean.compute_N_eff();
qpm_mean.compute_precision();

## Load noisy covariance matrices

In [None]:
noisy_cov=[]
for i in range(40):
    if os.path.exists(file_root+'Mock_%d'%i):
        p=QPM_Parameters(i)
        try:
            noisy_cov.append(CovarianceMatrix(p))            
        except OSError:
            continue
print("Read in %d noisy covariance matrices"%len(noisy_cov))

## Compare mean determinant per mode

In [None]:
def mean_det(cov_matrix):
    v=np.linalg.slogdet(cov_matrix.c_tot)[1]/(cov_matrix.n*cov_matrix.m)
    return np.exp(v)

smooth_mean_det=mean_det(qpm_mean)
noisy_mean_dets=[]
for i in range(len(noisy_cov)):
    noisy_mean_dets.append(mean_det(noisy_cov[i]))

In [None]:
x,y,z=plt.hist(noisy_mean_dets,density=True,alpha=0.5,label=r'Noisy $\xi$')
plt.vlines(smooth_mean_det,0,max(x)*1.1,label=r'Smooth $\xi$');
mean,err=np.mean(noisy_mean_dets),np.std(noisy_mean_dets)/np.sqrt(len(noisy_mean_dets))
plt.vlines(np.mean(mean),0,max(x)*1.1,linestyles='dotted')
yvals=np.linspace(0,max(x)*1.1,100);FS=16
plt.ylim(0,max(x)*1.1)
plt.fill_betweenx(yvals,np.ones(100)*(mean-err),np.ones(100)*(mean+err),alpha=0.1,color='k')
plt.legend(fontsize=FS-2)
plt.title('Fine Binning Mean Determininant Per Mode',fontsize=FS-2)
plt.ylabel('PDF',fontsize=FS);plt.xlabel(r'$|C_{ab}|^{1/n_\mathrm{bins}}$',fontsize=FS+3);

## Compare Diagonals for a noisy and smooth matrix:

In [None]:
pt=plotting_tools()
fig=pt.plot_diagonal(qpm_mean,name=r'Smooth $\xi$')
pt.plot_diagonal(noisy_cov[1],fig=fig,name=r'Noisy $\xi$',legend=True);

In [None]:
print('Mean of 100 xi (top)')
pt.plot_reduced_covariance(qpm_mean)
print('Single Matrix xi (bottom)')
pt.plot_reduced_covariance(noisy_cov[1])

## Compute KL divergence and $N_\mathrm{eff}$ estimates
Do this for a single noisy mock $\xi$ and using the smooth $\xi$ from 100 mocks/

First compute for the single high-resolution mock

In [None]:
partial_covs=[]
p=QPM_Parameters(1)
p.infile_root=file_root+'HiResMock_1/'
p.n_indiv=100
qpm_1=CovarianceMatrix(p)
i_min=5;i_max=p.n_indiv;i_step=3
for i in range(i_max):
    partial_covs.append(qpm_1.read_all_matrices(root=str(i)))
samples_indivs=list(np.arange(i_min,i_max,i_step))
N_effs_indivs,KL_div_indivs = [np.zeros(len(samples_indivs)) for _ in range(2)]
for j,ii in enumerate(samples_indivs):
    print("Computing D matrix %d of %d"%(ii,i_max))
    c_tot_mats=partial_covs[:ii]
    nn = len(c_tot_mats)
    summ=0.
    for i in range(nn):
        c_excl_i = np.mean(c_tot_mats[:i]+c_tot_mats[i+1:],axis=0)
        summ+=np.matmul(np.linalg.inv(c_excl_i),c_tot_mats[i])
    D_est = (nn-1.)/nn*(-1.*np.eye(len(c_tot_mats[0]))+1./nn*summ)
    slogdetD=np.linalg.slogdet(D_est)
    n_bins = len(D_est)
    D_value = slogdetD[0]*np.exp(slogdetD[1]/n_bins)
    N_effs_indivs[j] = (n_bins+1.)/D_value+1.
    KL_div_indivs[j] = KL_divergence(qpm_mean.prec,np.mean(c_tot_mats,axis=0))

In [None]:
N_bins=len(qpm_mean.prec)
N_eff_KL_indiv = [N_bins*(N_bins+1)/(4.*KL) for KL in KL_div_indivs]

Plot true $N_\mathrm{eff}$ (measuring in-matrix noisy level) against $N_\mathrm{eff,KL}$ (measuring expected noise if a noisy realization of the smooth $\xi$ covariance matrix).

In [None]:
plt.scatter(N_effs_indivs/1e5,np.asarray(N_eff_KL_indiv)/1e5,marker='x');
plt.ylabel(r'$N_{eff,KL}\,/\,10^5$',fontsize=18);plt.xlabel(r'$N_\mathrm{eff}\,/\,10^5$',fontsize=18)
plt.title(r'Comparing $N_\mathrm{eff}$ from Subsample Variance and $D_{KL}$',fontsize=16);
#plt.savefig("../../plots/N_eff_Subsample.pdf",bbox_inches='tight')

In [None]:
D_from_N_eff=[N_bins*(N_bins+1)/(4.*Ne) for Ne in N_effs_indivs]

Plot the expected KL divergence between the noisy and smooth matrix (if the noisy matrix is a realization from the smooth matrix distribution) and the true KL divergence.

In [None]:
plt.scatter(N_effs_indivs/1e5,np.asarray(D_from_N_eff),marker='x',label='Estimate');
plt.scatter(N_effs_indivs/1e5,np.asarray(KL_div_indivs),marker='x',label='True');
plt.xlabel(r'$N_\mathrm{eff}\,/\,10^5$',fontsize=18);plt.ylabel(r'$D_{KL}$',fontsize=18);plt.legend(fontsize=14)
plt.title(r'KL Divergence Estimates',fontsize=16);
#plt.savefig("../../plots/D_KL_vs_Estimate.pdf",bbox_inches='tight')

Save the dataset:

In [None]:
np.savez('NoisySmooth_KLdiv_N_eff_data.npz',N_eff_true=N_effs_indivs,
        KL_div_true=KL_div_indivs,N_eff_from_KL=N_eff_KL_indiv,
        KL_div_from_N_eff=D_from_N_eff)