# This notebook analyzes the Petrillo ML sample with the same LRG cuts from Petrillo and SDSS.
(i) The low-z (z < 0.4) LRG colour-magnitude selection of
Eisenstein et al. (2001), adapted to including more sources
(fainter and bluer):
r < 20
|c perp | < 0.2
r < 14 + c par /0.3
where
(1)
c par = 0.7(g − r) + 1.2[(r − i) − 0.18)]
c perp = (r − i) − (g − r)/4.0 − 0.18

(ii) A source size in the r -band larger than the average
FWHM of the PSF of the respective tiles, times a empir-
ical factor to maximize the separation between stars and galaxies.
# Edit 06/05/20 - Made stupid coding error that le to incorrect conclusions. Fixing now.

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.polynomial.polynomial import polyfit
import pandas as pd
#from matplotlib_venn import venn3, venn3_circles
import matplotlib.lines as lines
from astropy import constants as const
from astropy.io import fits
import astropy.coordinates as coord
import astropy.units as u
from astropy.table import Table
# import ephem
from astropy.cosmology import FlatLambdaCDM
from scipy import stats as sci

## g-band magnitude (AB)

In [10]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSg.fits')  # open a FITS file
hdul.verify('fix')
photo_g_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_g_id = photo_g_data.LAMID
# LAMID is GAMA ID where applicable.
photo_g_mag = photo_g_data.ABMagDFA

photo_g_list = list(zip(photo_g_id, photo_g_mag))

photo_g = pd.DataFrame(photo_g_list, columns = ['GAMA_ID', 'g'])
photo_g.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   g        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [11]:
# Remove IDs which have "New" in the name
photo_g = photo_g[~photo_g.GAMA_ID.str.contains('New')]

# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_g['GAMA_ID'] = photo_g['GAMA_ID'].str.decode("utf-8")

photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


In [12]:
# Write IDs as integers
photo_g['GAMA_ID'] = photo_g.GAMA_ID.astype(int)

In [13]:
photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


## r-band

In [14]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSr.fits')  # open a FITS file
hdul.verify('fix')
photo_r_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_r_id = photo_r_data.LAMID
# LAMID is GAMA ID where applicable.
photo_r_mag = photo_r_data.ABMagDFA

photo_r_list = list(zip(photo_r_id, photo_r_mag))

photo_r = pd.DataFrame(photo_r_list, columns = ['GAMA_ID', 'r'])
photo_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   r        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [8]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_r['GAMA_ID'] = photo_r['GAMA_ID'].str.decode("utf-8")

In [15]:
# Remove IDs which have "New" in the name
photo_r = photo_r[~photo_r.GAMA_ID.str.contains('New')]
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


In [16]:
# Write IDs as integers
photo_r['GAMA_ID'] = photo_r.GAMA_ID.astype(int)

In [19]:
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


## i-band

In [20]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSi.fits')  # open a FITS file
hdul.verify('fix')
photo_i_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_i_id = photo_i_data.LAMID
# LAMID is GAMA ID where applicable.
photo_i_mag = photo_i_data.ABMagDFA

photo_i_list = list(zip(photo_i_id, photo_i_mag))

photo_i = pd.DataFrame(photo_i_list, columns = ['GAMA_ID', 'i'])
photo_i.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   i        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [21]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_i['GAMA_ID'] = photo_i['GAMA_ID'].str.decode("utf-8")

In [22]:
# Remove IDs which have "New" in the name
photo_i = photo_i[~photo_i.GAMA_ID.str.contains('New')]
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


In [23]:
# Write IDs as integers
photo_i['GAMA_ID'] = photo_i.GAMA_ID.astype(int)
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


## Now I create variables to establish criteria for satisfying Petrillo's LRG classification.

## Now to see if Zoo passes the LRG cuts.

In [24]:
mac = pd.read_csv('/home/shawn/Desktop/gravitational_lensing_research/Lens_Project_Files/mac_latest.csv')
mac

Unnamed: 0.1,Unnamed: 0,GAMA_ID,ID,RA,DEC,SCORE,STELLAR_MASS,Z,theta_e
0,0,3899011.0,2251.0,131.33397,-0.91563,64.0,329400000000.0,0.4027,1.448577
1,1,376338.0,2660.0,132.98734,1.50468,30.0,303400000000.0,0.3272,1.54231
2,2,323152.0,2967.0,130.54643,1.64325,30.0,827700000000.0,0.7216,1.715374
3,3,663364.0,2669.0,140.35284,-0.97414,28.0,268800000000.0,0.3221,1.463153
4,4,3629152.0,1933.0,135.88864,-0.97487,26.0,212900000000.0,0.4059,1.159976
5,5,3575500.0,1906.0,129.23265,-1.57512,26.0,320900000000.0,0.3652,1.501378
6,6,197972.0,2007.0,137.85838,-0.7528,24.0,355100000000.0,0.3373,1.643379
7,7,3883690.0,2756.0,134.57492,-1.55142,22.0,136200000000.0,0.2481,1.186713
8,8,3911769.0,1704.0,138.66089,-1.11639,20.0,174000000000.0,0.2794,1.263956
9,9,371400.0,2275.0,131.20438,1.00064,18.0,202100000000.0,0.2948,1.326142


In [27]:
# create variables for the criteria

# join all photometry measurements to GAMA_ID
gr = pd.merge(photo_g, photo_r, on = 'GAMA_ID')
gri = pd.merge(gr, photo_i, on = 'GAMA_ID')

# convert to Johnson magnitude system (or # out addition for AB)
# We want AB.
g = gri.g# + 0.013
r = gri.r# + 0.226
i = gri.i# + 0.296

c_par = 0.7*(g - r) + 1.2*((r - i) - 0.18)
c_perp = (r - i) - (g - r)/4.0 - 0.18

gri['c_par'] = c_par
gri['c_perp'] = c_perp

gri

# add columns c_par and c_perp to mac
# 06/05/20 Must have had a weird brain-dead tired day... becauset his appends the first
# 47 magnitued measurements... which are not corresponding to the objects. Goodness that's bad.
#mac['g'] = g
#mac['r'] = r
#mac['i'] = i
#mac['c_par'] = c_par
#mac['c_perp'] = c_perp
#mac['g_r'] = g - r

Unnamed: 0,GAMA_ID,g,r,i,c_par,c_perp
0,100000,20.594820,19.811590,19.469633,0.742609,-0.033850
1,100001,20.743700,19.662579,19.226625,1.063929,-0.014327
2,100002,18.867937,17.846043,17.358280,1.084641,0.052289
3,100003,20.013288,19.200987,18.706486,0.946012,0.111426
4,100004,19.506737,19.207859,18.875196,0.392410,0.077943
...,...,...,...,...,...,...
219453,99995,21.020639,19.435972,18.752949,1.712895,0.106857
219454,99996,21.404068,19.633066,18.998640,1.785013,0.011676
219455,99997,21.347977,19.507254,18.886461,1.817457,-0.019388
219456,99998,20.773315,19.538841,19.030018,1.258720,0.020205


In [28]:
# merge mac with gri
mac = pd.merge(mac, gri, on = 'GAMA_ID')
mac

Unnamed: 0.1,Unnamed: 0,GAMA_ID,ID,RA,DEC,SCORE,STELLAR_MASS,Z,theta_e,g,r,i,c_par,c_perp
0,0,3899011.0,2251.0,131.33397,-0.91563,64.0,329400000000.0,0.4027,1.448577,19.552374,18.237883,17.61747,1.448639,0.11179
1,1,376338.0,2660.0,132.98734,1.50468,30.0,303400000000.0,0.3272,1.54231,19.727428,18.198345,17.597242,1.575682,0.038832
2,2,323152.0,2967.0,130.54643,1.64325,30.0,827700000000.0,0.7216,1.715374,20.163202,18.788204,18.186134,1.468982,0.07832
3,3,663364.0,2669.0,140.35284,-0.97414,28.0,268800000000.0,0.3221,1.463153,20.242424,18.590208,18.022409,1.62191,-0.025255
4,4,3629152.0,1933.0,135.88864,-0.97487,26.0,212900000000.0,0.4059,1.159976,20.751757,19.22267,18.575918,1.630463,0.08448
5,5,3575500.0,1906.0,129.23265,-1.57512,26.0,320900000000.0,0.3652,1.501378,20.262245,18.448973,17.908367,1.702017,-0.092713
6,6,197972.0,2007.0,137.85838,-0.7528,24.0,355100000000.0,0.3373,1.643379,20.10787,18.495419,17.897757,1.62991,0.014549
7,7,3883690.0,2756.0,134.57492,-1.55142,22.0,136200000000.0,0.2481,1.186713,19.631536,18.413004,17.965242,1.174287,-0.036872
8,8,3911769.0,1704.0,138.66089,-1.11639,20.0,174000000000.0,0.2794,1.263956,19.214046,17.987906,17.453287,1.283841,0.048083
9,9,371400.0,2275.0,131.20438,1.00064,18.0,202100000000.0,0.2948,1.326142,19.247175,17.947552,17.374062,1.381925,0.068584


In [35]:
# Take Petrillo's 1st set of criteria for LRG (z < 0.4)
mac_LRG = mac[(mac.r < 20) & (abs(mac.c_perp) < 0.2) & (mac.r < 14 + mac.c_par/0.3)]

print(len(mac_LRG))

mac_not_LRG = mac[~mac.GAMA_ID.isin(mac_LRG.GAMA_ID)]

print(len(mac_not_LRG))

39
8


## Only seven candidates pass the LRG cuts imposed by Petrillo.
This worries me.

# Edit 06/05/20 - 39 candidates pass. Lets look at the other 8.

In [36]:
mac_not_LRG

Unnamed: 0.1,Unnamed: 0,GAMA_ID,ID,RA,DEC,SCORE,STELLAR_MASS,Z,theta_e,g,r,i,c_par,c_perp
7,7,3883690.0,2756.0,134.57492,-1.55142,22.0,136200000000.0,0.2481,1.186713,19.631536,18.413004,17.965242,1.174287,-0.036872
19,19,173045.0,534.0,185.64714,-2.42117,36.0,82070000000.0,0.2556,0.907575,19.837652,18.622545,18.060822,1.308643,0.077947
22,22,583457.0,3357.0,174.8049,-0.00257,28.0,98730000000.0,0.2636,0.980217,20.844198,19.647169,19.134146,1.237548,0.033766
24,24,124486.0,2553.0,179.73003,-2.51895,28.0,85410000000.0,0.1435,1.23566,18.709095,17.625223,17.18202,1.074554,-0.007765
28,28,561967.0,1252.0,185.57948,-0.56586,22.0,336700000000.0,0.4584,1.372682,20.724981,19.161783,18.376511,1.820566,0.214473
34,34,15790.0,1258.0,216.7776,0.7208,42.0,120400000000.0,0.2952,1.022882,19.934919,18.630444,18.063387,1.377601,0.060938
40,40,249703.0,1630.0,212.6116,2.14905,22.0,549400000000.0,0.4673,1.736669,20.298817,18.700047,17.87324,1.895308,0.247114
46,46,250289.0,2730.0,214.36708,1.99311,28.0,189400000000.0,0.4009,1.100886,20.60841,19.140938,18.383068,1.720674,0.211002


In [42]:
mac_not_LRG['r_minus_cpar'] = mac_not_LRG.r - (14 + mac_not_LRG.c_par/0.3)
print(np.mean(mac_not_LRG.r_minus_cpar[mac_not_LRG.r_minus_cpar > 0]),
np.median(mac_not_LRG.r_minus_cpar[mac_not_LRG.r_minus_cpar > 0]))

0.4725881449381511 0.260400822957358


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


### 5 of the 8 candidates that fail LRG cuts with Lambdar photometry fail the c_par aspect, which is intended to set a luminosity threshold as a sliding function of redshift. It is compared to the r-band magnitude to represent an older stellar population. The other three fail the c_perp aspect, which is introduced because colors in the g-r vs r-i display a linear locus due to degeneracy of early-mid type galaxies at z < 0.4. c_perp is the distance from that locus, and c_perp < 0.2 focuses on that locus. These galaxies are at z > 0.4, so they should not be considered to have failed. (Eisenstein-2001)
### In summary, of the 47 candidates... 39 pass the z < 0.4 LRG cut. 3 are at z > 0.4, and therefore do not follow the criterion. The remaining five fail the the sliding luminosity threshold as a function of redshift by an mean of 0.47 and median of 0.26 mag.