Magnification bias analysis of ELG sample using Rongpu Zhao's method.

In [15]:
import numpy as np
import healpy as hp
from astropy.io import fits
from copy import deepcopy
import pickle
import pandas as pd
import fitsio
from scipy.interpolate import interp1d
import sklearn
from joblib import dump, load
import time 

import matplotlib.pyplot as plt 
plt.style.use("seaborn-talk")

In [2]:
# need this since pandas can't read these files 
import pickle5 as pickle
with open("/pscratch/sd/t/tanveerk/final_data_products/elgXplanck/dr9_elg_sv3_selected_features.pkl", "rb") as fh:
    photoz_features = pickle.load(fh)

First we sub-select ELGs from the main sample by subtracting $0.1$ along all magnitudes. We do this to ensure that for a flux-limited sample, we are counting all the possible objects. If we keep our measurement to the flux-limit, then when we magnify, we will not know how many fainter objects will make it to our sample. 

In [3]:
# features that need to be used for sub-selection
features_mag = np.array(['MAG_G', 'MAG_R', 'MAG_Z', 'gfib', 'rfib', 'zfib'])

In [4]:
photoz_features.head()

Unnamed: 0,r-z,g-r,MAG_G,MAG_R,MAG_Z,gfib,rfib,zfib,SHAPE_R,targetid,morphtype
0,0.31367493,0.20127678,23.01813,22.816853,22.503178,23.29049,23.08921,22.775537,0.0,39633547257970726,PSF
1,1.0291519,-0.10413742,23.33529,23.439428,22.410276,24.043419,24.147556,23.118404,0.48742554,39633547257970732,REX
2,0.8038616,-0.38212013,23.738085,24.120205,23.316343,24.00934,24.39146,23.587599,0.0,39633547257970738,PSF
3,0.7879257,0.31860352,23.511257,23.192654,22.404728,23.783386,23.464783,22.676857,0.0,39633547257970792,PSF
4,0.9754276,0.32086945,23.389309,23.06844,22.093012,23.81644,23.495571,22.520144,0.26444468,39633547257970945,REX


First convert relevant features to float from strings.

In [5]:
for feature in features_mag:
    photoz_features[feature] = pd.to_numeric(photoz_features[feature])

In [6]:
# subselect by 0.1
elgs_subselected = deepcopy(photoz_features)

for feature in features_mag:
    elgs_subselected = elgs_subselected[elgs_subselected[feature] <= (elgs_subselected[feature].max() - 0.1)]

In [7]:
photoz_features.max()

r-z                  1.2499428
g-r               9.918213e-05
MAG_G                 23.82878
MAG_R                24.225803
MAG_Z                24.032827
gfib                 24.099998
rfib                 24.499212
zfib                 24.304573
SHAPE_R              1.4998505
targetid     39637366360510011
morphtype                  SER
dtype: object

In [8]:
elgs_subselected.max()

r-z                  1.2499428
g-r               9.918213e-05
MAG_G                23.728779
MAG_R                24.027977
MAG_Z                23.772005
gfib                 23.999996
rfib                 24.298996
zfib                 24.043184
SHAPE_R              1.4998505
targetid     39637366360510006
morphtype                  SER
dtype: object

### Fiber Magnitude Correction Factors
Next we apply fiber magnitude correction factors according to Rongpu's table. 

In [30]:
elgs_subselected['ff_ratio'] = -99.  # FIBERFLUX / FLUX
elgs_subselected['ff_factor'] = -99.  # Fiberflux multiplicative factor for magnification

In [31]:
# flags for type
mask_psf = (elgs_subselected['morphtype']=='PSF')
mask_rex = (elgs_subselected['morphtype']=='REX')
mask_exp = (elgs_subselected['morphtype']=='EXP')
mask_dev = (elgs_subselected['morphtype']=='DEV')

#### PSF

In [33]:
print('PSF')
print(np.sum(mask_psf), np.sum(mask_psf)/len(mask_psf))
elgs_subselected['ff_factor'][mask_psf] = 1. # no correction needed 

PSF
8483380 0.2944791162287173


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


#### Rounded Exponentials (REX)

In [None]:
print('REX')
print(np.sum(mask_rex), np.sum(mask_rex)/len(mask_rex))

data_rex = np.load('/global/homes/t/tanveerk/lselgsXplanck/src/for_tanveer/data/rex.npz')
f_ratio_interp = interp1d(data_rex['shape_r'], data_rex['ratio'], bounds_error=False,
                          fill_value='extrapolate', kind='quadratic')
f_ratio = f_ratio_interp(photoz_features['SHAPE_R'][mask_rex])
f_ratio = np.clip(f_ratio, 0, 1)
f_factor_interp = interp1d(data_rex['shape_r'], data_rex['f_factor'], 
                           bounds_error=False, fill_value='extrapolate', kind='quadratic')
f_factor = f_factor_interp(photoz_features['SHAPE_R'][mask_rex])
f_factor = np.clip(f_factor, 0, 1)
print(np.median(f_factor))

photoz_features['ff_ratio'][mask_rex] = f_ratio
photoz_features['ff_factor'][mask_rex] = f_factor

Now we count the number of objects that fall within our selection. 

In [10]:
# select and order features the same way as RFC was trained
X = elgs_subselected.iloc[:,:-3]
X = X[['MAG_G', 'MAG_R', 'MAG_Z', 'g-r', 'r-z', 'gfib', 'rfib', 'zfib']]

In [14]:
#read in trained pipeline 
pipeline = load('/pscratch/sd/t/tanveerk/final_data_products/elgXplanck/fuji_RandomForestClassifier_single_tomo.joblib') 

In [17]:
start = time.time()
#predict with pipeline
y_prd = pipeline.predict(X)
print(f"It took {time.time() - start} seconds to predict {len(X)} ELGs.")

It took 129.64172554016113 seconds to predict 28808087 ELGs.


In [18]:
28808087/130

221600.66923076924

In [21]:
(130 * 100)/3600

3.611111111111111

In [28]:
np.sum(y_prd == 2)/len(y_prd) * 100

13.221343020798294

PSF
11178212 0.2995113593517259


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


REX
20439919 0.547671481336118
0.7998132504272224


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [7]:
for magnification in [0.99, 1., 1.01]:

    gmag = photoz_features['MAG_G'].astype(np.float) - 2.5*np.log10(magnification)
    rmag = photoz_features['MAG_R'].astype(np.float) - 2.5*np.log10(magnification)
    zmag = photoz_features['MAG_Z'].astype(np.float) - 2.5*np.log10(magnification)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  This is separate from the ipykernel package so we can avoid doing imports until
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  after removing the cwd from sys.path.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """


In [12]:
ff_factor = True
if ff_factor:
    gfibermag = photoz_features['gfib'].astype(np.float) - 2.5*np.log10(1 + (magnification - 1)*photoz_features['ff_factor']) 
    rfibermag = photoz_features['rfib'].astype(np.float) - 2.5*np.log10(1 + (magnification - 1)*photoz_features['ff_factor']) 
    zfibermag = photoz_features['zfib'].astype(np.float) - 2.5*np.log10(1 + (magnification - 1)*photoz_features['ff_factor']) 
else:
    zfibermag = 22.5 - 2.5 * np.log10((cat['FIBERFLUX_Z'] * (1 + (magnification-1) * 1) / cat['MW_TRANSMISSION_Z']).clip(1e-7))

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  This is separate from the ipykernel package so we can avoid doing imports until
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  after removing the cwd from sys.path.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """


In [14]:
magnify_low, magnify_high = 0.99, 1.01
dmag = (-2.5 * np.log10(magnify_low)) - (-2.5 * np.log10(magnify_high))


In [15]:
dmag

0.021715447962731664

In [13]:
magnify_low, magnify_high = 0.99, 1.01
dmag = (-2.5 * np.log10(magnify_low)) - (-2.5 * np.log10(magnify_high))