# This notebook analyzes the Spec sample with the same LRG cuts from Petrillo and SDSS.
(i) The low-z (z < 0.4) LRG colour-magnitude selection of
Eisenstein et al. (2001), adapted to including more sources
(fainter and bluer):
r < 20
|c perp | < 0.2
r < 14 + c par /0.3
where
(1)
c par = 0.7(g − r) + 1.2[(r − i) − 0.18)]
c perp = (r − i) − (g − r)/4.0 − 0.18

(ii) A source size in the r -band larger than the average
FWHM of the PSF of the respective tiles, times a empir-
ical factor to maximize the separation between stars and galaxies.
# Edit 06/05/20 I made a grave mistake here and am redoing these calculations.
# Edit 06/09/20 - Redoing with final number of 16 candidates.
# Edit 07/1/20 - Redoing with 13 candidates. (final? haha)

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.polynomial.polynomial import polyfit
import pandas as pd
#from matplotlib_venn import venn3, venn3_circles
import matplotlib.lines as lines
from astropy import constants as const
from astropy.io import fits
import astropy.coordinates as coord
import astropy.units as u
from astropy.table import Table
# import ephem
from astropy.cosmology import FlatLambdaCDM
from scipy import stats as sci

## g-band magnitude (AB)

In [2]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSg.fits')  # open a FITS file
hdul.verify('fix')
photo_g_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_g_id = photo_g_data.LAMID
# LAMID is GAMA ID where applicable.
photo_g_mag = photo_g_data.ABMagDFA

photo_g_list = list(zip(photo_g_id, photo_g_mag))

photo_g = pd.DataFrame(photo_g_list, columns = ['GAMA_ID', 'g'])
photo_g.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   g        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [3]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_g['GAMA_ID'] = photo_g['GAMA_ID'].str.decode("utf-8")

In [4]:
# Remove IDs which have "New" in the name
photo_g = photo_g[~photo_g.GAMA_ID.str.contains('New')]
photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


In [5]:
# Write IDs as integers
photo_g['GAMA_ID'] = photo_g.GAMA_ID.astype(int)

In [6]:
photo_g

Unnamed: 0,GAMA_ID,g
0,100000,20.594820
1,100001,20.743700
2,100002,18.867937
3,100003,20.013288
4,100004,19.506737
...,...,...
219453,99995,21.020639
219454,99996,21.404068
219455,99997,21.347977
219456,99998,20.773315


## r-band

In [7]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSr.fits')  # open a FITS file
hdul.verify('fix')
photo_r_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_r_id = photo_r_data.LAMID
# LAMID is GAMA ID where applicable.
photo_r_mag = photo_r_data.ABMagDFA

photo_r_list = list(zip(photo_r_id, photo_r_mag))

photo_r = pd.DataFrame(photo_r_list, columns = ['GAMA_ID', 'r'])
photo_r.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   r        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [8]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_r['GAMA_ID'] = photo_r['GAMA_ID'].str.decode("utf-8")

In [9]:
# Remove IDs which have "New" in the name
photo_r = photo_r[~photo_r.GAMA_ID.str.contains('New')]
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


In [10]:
# Write IDs as integers
photo_r['GAMA_ID'] = photo_r.GAMA_ID.astype(int)

In [11]:
photo_r

Unnamed: 0,GAMA_ID,r
0,100000,19.811590
1,100001,19.662579
2,100002,17.846043
3,100003,19.200987
4,100004,19.207859
...,...,...
219453,99995,19.435972
219454,99996,19.633066
219455,99997,19.507254
219456,99998,19.538841


## i-band

In [12]:
# Opening GAMA LambdarCat data from G09, G12, and G15 (DR3)
hdul = fits.open('/home/shawn/Desktop/gravitational_lensing_research/FITS_Files/LambdarSDSSi.fits')  # open a FITS file
hdul.verify('fix')
photo_i_data = hdul[1].data  # assume the first extension is a table
#print(photo_g_data.columns)

# take ID and mag in g
photo_i_id = photo_i_data.LAMID
# LAMID is GAMA ID where applicable.
photo_i_mag = photo_i_data.ABMagDFA

photo_i_list = list(zip(photo_i_id, photo_i_mag))

photo_i = pd.DataFrame(photo_i_list, columns = ['GAMA_ID', 'i'])
photo_i.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220158 entries, 0 to 220157
Data columns (total 2 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   GAMA_ID  220158 non-null  object 
 1   i        220158 non-null  float64
dtypes: float64(1), object(1)
memory usage: 3.4+ MB


In [13]:
# GAMA_ID is given in bytes, need to decode. #b'8000'.decode("utf-8")
#photo_g_id.decode("utf-8")
#photo_i['GAMA_ID'] = photo_i['GAMA_ID'].str.decode("utf-8")

In [14]:
# Remove IDs which have "New" in the name
photo_i = photo_i[~photo_i.GAMA_ID.str.contains('New')]
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


In [15]:
# Write IDs as integers
photo_i['GAMA_ID'] = photo_i.GAMA_ID.astype(int)
photo_i

Unnamed: 0,GAMA_ID,i
0,100000,19.469633
1,100001,19.226625
2,100002,17.358280
3,100003,18.706486
4,100004,18.875196
...,...,...
219453,99995,18.752949
219454,99996,18.998640
219455,99997,18.886461
219456,99998,19.030018


## Now I create variables to establish criteria for satisfying Petrillo's LRG classification.

## Now to see if Spec passes the LRG cuts.

In [18]:
spec = pd.read_csv('/home/shawn/Desktop/gravitational_lensing_research/Lens_Project_Files/spec_latest.csv')

In [19]:
len(spec)

47

In [20]:
# create variables for the criteria

# join all photometry measurements to GAMA_ID
gr = pd.merge(photo_g, photo_r, on = 'GAMA_ID')
gri = pd.merge(gr, photo_i, on = 'GAMA_ID')

# convert to Johnson magnitude system (or # out addition for AB)
# We want AB.
g = gri.g# + 0.013
r = gri.r# + 0.226
i = gri.i# + 0.296

c_par = 0.7*(g - r) + 1.2*((r - i) - 0.18)
c_perp = (r - i) - (g - r)/4.0 - 0.18

gri['c_par'] = c_par
gri['c_perp'] = c_perp

gri

# this was the huge mistake... really stupid.
# add columns c_par and c_perp to zoo
#zoo['r'] = r
#zoo['c_par'] = c_par
#zoo['c_perp'] = c_perp


Unnamed: 0,GAMA_ID,g,r,i,c_par,c_perp
0,100000,20.594820,19.811590,19.469633,0.742609,-0.033850
1,100001,20.743700,19.662579,19.226625,1.063929,-0.014327
2,100002,18.867937,17.846043,17.358280,1.084641,0.052289
3,100003,20.013288,19.200987,18.706486,0.946012,0.111426
4,100004,19.506737,19.207859,18.875196,0.392410,0.077943
...,...,...,...,...,...,...
219453,99995,21.020639,19.435972,18.752949,1.712895,0.106857
219454,99996,21.404068,19.633066,18.998640,1.785013,0.011676
219455,99997,21.347977,19.507254,18.886461,1.817457,-0.019388
219456,99998,20.773315,19.538841,19.030018,1.258720,0.020205


In [21]:
# merge zoo with gri
spec = pd.merge(spec, gri, on = 'GAMA_ID')
spec

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,GAMA_ID,RA,DEC,z_lens,D_lens,D_source,magphys_mstar,lambdar_log_mstar,...,lambdar_log_mstar_error,m_half_re,sigma_star,theta_e_pm,theta_e_sis,g,r,i,c_par,c_perp
0,0,0,198082,138.2815,-0.66673,0.163,550,920,30250000000.0,10.882681,...,0.097977,44131000000.0,180.071717,0.51215,0.376946,19.486719,18.456551,18.012224,1.03831,0.006784
1,1,1,202448,129.69546,-0.38179,0.418,1091,1453,,,...,,,,,,20.294607,19.274935,18.779585,1.092191,0.060432
2,2,2,204140,136.63883,-0.35203,0.282,841,1139,85780000000.0,11.181007,...,0.12889,104148200000.0,213.699288,0.513185,0.345361,20.264303,18.882341,18.404697,1.324546,-0.047846
3,3,3,209222,132.36771,0.1636,0.128,449,1331,14250000000.0,10.628404,...,0.214815,21227420000.0,151.684875,0.504627,0.440706,19.781664,18.7132,18.210352,1.135342,0.055732
4,4,4,209295,132.61013,0.11972,0.313,905,1337,84560000000.0,11.387013,...,0.114744,188434100000.0,244.169142,0.739489,0.556811,20.30125,18.945938,18.377218,1.415182,0.049892
5,5,5,279956,140.14187,0.97341,0.336,949,1313,,,...,,,,,,20.974964,19.708279,19.097034,1.404173,0.114573
6,6,6,301818,135.32929,1.22984,0.247,763,1193,121500000000.0,11.491164,...,0.147386,254300900000.0,261.190357,0.988156,0.710749,19.951376,18.864307,18.386555,1.118251,0.025986
7,7,7,302719,138.94058,1.33144,0.404,1069,1321,147400000000.0,11.507252,...,0.132535,266352800000.0,263.923303,0.621568,0.384085,21.115643,19.766256,19.200258,1.407768,0.048652
8,8,8,371208,130.50925,1.02071,0.077,285,1422,4812000000.0,9.972547,...,0.166724,3214222000.0,90.605856,0.270736,0.189735,19.597534,18.7897,18.420143,0.792952,-0.012402
9,9,9,377486,137.90642,1.62916,0.169,566,945,,,...,,,,,,20.378325,19.203476,18.698872,1.211919,0.030892


In [22]:
# Take Petrillo's 1st set of criteria for LRG (z < 0.4)
spec_LRG = spec[(spec.r < 20) & (abs(spec.c_perp) < 0.2) & (spec.r < 14 + spec.c_par/0.3)]
#spec_LRG_1 = spec_LRG_1[abs(spec_LRG_1.r) < 100]

print(len(spec_LRG))

spec_not_LRG = spec[~spec.GAMA_ID.isin(spec_LRG.GAMA_ID)]

print(len(spec_not_LRG))

3
44


# Only three of 47 pass the cuts.

In [24]:
spec_not_LRG.c_perp > 0.2
# All 19 pass the c_perp aspect
# two fail that

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
15    False
16     True
17    False
18    False
19    False
20     True
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
34    False
35    False
37    False
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
Name: c_perp, dtype: bool

In [26]:
spec_not_LRG['r_minus_cpar'] = spec_not_LRG.r - (14 + spec_not_LRG.c_par/0.3)
spec_not_LRG.r_minus_cpar
# 43 of 44 have r > calculation... Meaning they are not red?

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


0     0.995518
1     1.634300
2     0.467188
3     0.928726
4     0.228663
5     1.027702
6     1.136803
7     1.073696
8     2.146526
9     1.163745
10    1.548754
11    0.917409
12    1.799138
13    1.055962
15    1.554171
16   -0.037809
17    0.837357
18    0.084832
19    0.210812
20    0.470097
21    2.945150
22    2.339935
23    3.625186
24    1.562719
25    0.964294
26    1.437876
27    0.319214
28    1.908637
29    1.293670
30    0.722480
31    2.120699
32    1.129878
34    1.144898
35    0.386009
37    0.554321
38    1.270962
39    2.101851
40    1.407706
41    2.863751
42    1.190544
43    1.079398
44    1.659848
45    1.337293
46    1.633603
Name: r_minus_cpar, dtype: float64

In [27]:
print(np.mean(spec_not_LRG.r_minus_cpar[spec_not_LRG.r_minus_cpar > 0]),
np.median(spec_not_LRG.r_minus_cpar[spec_not_LRG.r_minus_cpar > 0]))

1.3088680007106577 1.1637452952067058


## All 19 candidates that fail LRG cuts with Lambdar photometry fail the c_par aspect, which is intended to set a luminosity threshold as a sliding function of redshift. It is compared to the r-band magnitude to represent an older stellar population. However, (Eisenstein-2001) "strongly advise[s] the reader that the LRG flags cannot be used to select a volume-weighted sample at z < 0.15... However, the linear color-magnitude cut (eq. [4]) is not a good approximation to the locus of an early-type galaxy at lower redshifts. At z < 0.15, Cut I is too permissive, allowing lower luminosity sources to enter the LRG sample. Hence, to extract LRGs from the MAIN sample at z < 0.15, one must make additional post-spectroscopic cuts." They do not give a "clean prescription for this".
## All 19 fail the the sliding luminosity threshold as a function of redshift by an mean of 0.66 and median of 0.82 mag.
# EDIT. New number is 8 pass, 8 fail.
## All 8 fail the the sliding luminosity threshold as a function of redshift by an mean of 0.39 and median of 0.29 mag.

In [23]:
zoo_not_LRG.Z

1     0.1190
3     0.1074
5     0.1435
7     0.1377
12    0.0876
13    0.0985
14    0.1436
15    0.1221
Name: Z, dtype: float64