# This notebook analyzes the Zoo sample with the same LRG cuts from Petrillo and SDSS.
(i) The low-z (z < 0.4) LRG colour-magnitude selection of
Eisenstein et al. (2001), adapted to including more sources
(fainter and bluer):
r < 20
|c perp | < 0.2
r < 14 + c par /0.3
where
(1)
c par = 0.7(g − r) + 1.2[(r − i) − 0.18)]
c perp = (r − i) − (g − r)/4.0 − 0.18

(ii) A source size in the r -band larger than the average
FWHM of the PSF of the respective tiles, times a empir-
ical factor to maximize the separation between stars and galaxies.
# Edit 06/05/20 I made a grave mistake here and am redoing these calculations.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.polynomial.polynomial import polyfit
import pandas as pd
#from matplotlib_venn import venn3, venn3_circles
import matplotlib.lines as lines
from astropy import constants as const
from astropy.io import fits
import astropy.coordinates as coord
import astropy.units as u
from astropy.table import Table
# import ephem
from astropy.cosmology import FlatLambdaCDM
from scipy import stats as sci

## g-band magnitude (AB)

In [2]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSg.fits')  # open a FITS file
hdul.verify('fix')
photo_g_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_g_id = photo_g_data.LAMID
# LAMID is GAMA ID where applicable.
photo_g_mag = photo_g_data.ABMagDFA

photo_g_list = list(zip(photo_g_id, photo_g_mag))

photo_g = pd.DataFrame(photo_g_list, columns = ['GAMA_ID', 'g'])
photo_g.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   g        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [3]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_g['GAMA_ID'] = photo_g['GAMA_ID'].str.decode("utf-8")

In [4]:
# Remove IDs which have "New" in the name
photo_g = photo_g[~photo_g.GAMA_ID.str.contains('New')]
photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


In [5]:
# Write IDs as integers
photo_g['GAMA_ID'] = photo_g.GAMA_ID.astype(int)

In [6]:
photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


## r-band

In [7]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSr.fits')  # open a FITS file
hdul.verify('fix')
photo_r_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_r_id = photo_r_data.LAMID
# LAMID is GAMA ID where applicable.
photo_r_mag = photo_r_data.ABMagDFA

photo_r_list = list(zip(photo_r_id, photo_r_mag))

photo_r = pd.DataFrame(photo_r_list, columns = ['GAMA_ID', 'r'])
photo_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   r        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [8]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_r['GAMA_ID'] = photo_r['GAMA_ID'].str.decode("utf-8")

In [9]:
# Remove IDs which have "New" in the name
photo_r = photo_r[~photo_r.GAMA_ID.str.contains('New')]
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


In [10]:
# Write IDs as integers
photo_r['GAMA_ID'] = photo_r.GAMA_ID.astype(int)

In [11]:
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


## i-band

In [12]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSi.fits')  # open a FITS file
hdul.verify('fix')
photo_i_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_i_id = photo_i_data.LAMID
# LAMID is GAMA ID where applicable.
photo_i_mag = photo_i_data.ABMagDFA

photo_i_list = list(zip(photo_i_id, photo_i_mag))

photo_i = pd.DataFrame(photo_i_list, columns = ['GAMA_ID', 'i'])
photo_i.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   i        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [13]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_i['GAMA_ID'] = photo_i['GAMA_ID'].str.decode("utf-8")

In [14]:
# Remove IDs which have "New" in the name
photo_i = photo_i[~photo_i.GAMA_ID.str.contains('New')]
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


In [15]:
# Write IDs as integers
photo_i['GAMA_ID'] = photo_i.GAMA_ID.astype(int)
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


## Now I create variables to establish criteria for satisfying Petrillo's LRG classification.

## Now to see if Zoo passes the LRG cuts.

In [16]:
zoo = pd.read_csv('/home/shawn/Desktop/gravitational_lensing_research/Lens_Project_Files/zoo_latest.csv')

In [17]:
# create variables for the criteria

# join all photometry measurements to GAMA_ID
gr = pd.merge(photo_g, photo_r, on = 'GAMA_ID')
gri = pd.merge(gr, photo_i, on = 'GAMA_ID')

# convert to Johnson magnitude system (or # out addition for AB)
# We want AB.
g = gri.g# + 0.013
r = gri.r# + 0.226
i = gri.i# + 0.296

c_par = 0.7*(g - r) + 1.2*((r - i) - 0.18)
c_perp = (r - i) - (g - r)/4.0 - 0.18

gri['c_par'] = c_par
gri['c_perp'] = c_perp

gri

# this was the huge mistake... really stupid.
# add columns c_par and c_perp to zoo
#zoo['r'] = r
#zoo['c_par'] = c_par
#zoo['c_perp'] = c_perp


Unnamed: 0,GAMA_ID,g,r,i,c_par,c_perp
0,100000,20.594820,19.811590,19.469633,0.742609,-0.033850
1,100001,20.743700,19.662579,19.226625,1.063929,-0.014327
2,100002,18.867937,17.846043,17.358280,1.084641,0.052289
3,100003,20.013288,19.200987,18.706486,0.946012,0.111426
4,100004,19.506737,19.207859,18.875196,0.392410,0.077943
...,...,...,...,...,...,...
219453,99995,21.020639,19.435972,18.752949,1.712895,0.106857
219454,99996,21.404068,19.633066,18.998640,1.785013,0.011676
219455,99997,21.347977,19.507254,18.886461,1.817457,-0.019388
219456,99998,20.773315,19.538841,19.030018,1.258720,0.020205


In [18]:
# merge zoo with gri
zoo = pd.merge(zoo, gri, on = 'GAMA_ID')
zoo

Unnamed: 0.1,Unnamed: 0,GAMA_ID,RA,DEC,LENS_SCORE,STELLAR_MASS,Z,TOTAL_MASS,theta_e,g,r,i,c_par,c_perp
0,0,511867,216.38846,-1.11396,0.352229,54590000000.0,0.055,139974400000.0,1.59568,15.715085,15.139613,14.774145,0.625392,0.0416
1,1,485873,217.75015,-1.80042,0.349456,49080000000.0,0.0539,125846200000.0,1.52837,16.423378,15.589527,15.164383,0.877869,0.036681
2,2,70022,178.02218,0.07392,0.368109,80460000000.0,0.0599,206307700000.0,1.856296,16.370449,15.496297,15.068813,0.908887,0.028946
3,3,170898,176.46563,-2.33643,0.336096,231900000000.0,0.1215,594615400000.0,2.212752,16.998379,16.009546,15.557212,1.018984,0.025126
4,4,184275,175.91698,-1.44119,0.317262,44110000000.0,0.1237,113102600000.0,0.956432,17.832977,17.15332,16.725416,0.773245,0.07799
5,5,84050,175.79849,0.47814,0.365758,54710000000.0,0.119,140282100000.0,1.086001,17.991753,17.103765,16.721144,0.864737,-0.019375
6,6,7104,175.71213,0.83228,0.333333,17680000000.0,0.0751,45333330000.0,0.777126,18.182354,17.509954,17.084211,0.765571,0.077643
7,7,55245,181.07958,-0.31553,0.314054,127400000000.0,0.0936,326666700000.0,1.868604,15.879565,15.267034,14.955459,0.586662,-0.021558
8,8,70282,179.40308,0.12562,0.655352,42560000000.0,0.1074,109128200000.0,1.008252,18.413406,17.419868,16.975845,1.012304,0.015639
9,9,185451,180.28326,-1.61006,0.302985,82220000000.0,0.1079,210820500000.0,1.398133,17.746004,16.827835,16.36945,0.976781,0.048843


In [19]:
# Take Petrillo's 1st set of criteria for LRG (z < 0.4)
zoo_LRG = zoo[(zoo.r < 20) & (abs(zoo.c_perp) < 0.2) & (zoo.r < 14 + zoo.c_par/0.3)]
#zoo_LRG_1 = zoo_LRG_1[abs(zoo_LRG_1.r) < 100]

print(len(zoo_LRG))

zoo_not_LRG = zoo[~zoo.GAMA_ID.isin(zoo_LRG.GAMA_ID)]

print(len(zoo_not_LRG))

17
19


## Only six candidates pass the LRG cuts imposed by Petrillo!
# Edit 06/05/20 - 17 pass the LRG cuts. Lets look at the other 19.

In [20]:
zoo_not_LRG.c_perp > 0.2
# All 19 pass the c_perp aspect

4     False
5     False
6     False
8     False
10    False
13    False
15    False
17    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
32    False
33    False
34    False
Name: c_perp, dtype: bool

In [21]:
zoo_not_LRG['r_minus_cpar'] = zoo_not_LRG.r - (14 + zoo_not_LRG.c_par/0.3)
zoo_not_LRG.r_minus_cpar
# all have r > calculation... Meaning they are not red?

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


4     0.575838
5     0.221308
6     0.958050
8     0.045521
10    0.043377
13    0.894571
15    0.978109
17    0.618755
23    0.979543
24    0.106565
25    0.820980
26    0.878458
27    0.918456
28    0.842679
29    0.195914
30    1.030883
32    0.360994
33    1.334277
34    0.824342
Name: r_minus_cpar, dtype: float64

In [22]:
print(np.mean(zoo_not_LRG.r_minus_cpar[zoo_not_LRG.r_minus_cpar > 0]),
np.median(zoo_not_LRG.r_minus_cpar[zoo_not_LRG.r_minus_cpar > 0]))

0.6646641353138707 0.8243415069580067


## All 19 candidates that fail LRG cuts with Lambdar photometry fail the c_par aspect, which is intended to set a luminosity threshold as a sliding function of redshift. It is compared to the r-band magnitude to represent an older stellar population. However, (Eisenstein-2001) "strongly advise[s] the reader that the LRG flags cannot be used to select a volume-weighted sample at z < 0.15... However, the linear color-magnitude cut (eq. [4]) is not a good approximation to the locus of an early-type galaxy at lower redshifts. At z < 0.15, Cut I is too permissive, allowing lower luminosity sources to enter the LRG sample. Hence, to extract LRGs from the MAIN sample at z < 0.15, one must make additional post-spectroscopic cuts." They do not give a "clean prescription for this".
## All 19 fail the the sliding luminosity threshold as a function of redshift by an mean of 0.66 and median of 0.82 mag.

In [24]:
zoo_not_LRG.Z

4     0.1237
5     0.1190
6     0.0751
8     0.1074
10    0.1435
13    0.1081
15    0.1192
17    0.1377
23    0.0779
24    0.0876
25    0.0985
26    0.1436
27    0.1202
28    0.1115
29    0.1046
30    0.1194
32    0.1221
33    0.1344
34    0.0898
Name: Z, dtype: float64