In [None]:
import numpy as np
from astropy.io import fits as fits
import matplotlib.pyplot as plt
from astropy.coordinates import SkyCoord
from astropy.coordinates import match_coordinates_sky
from astropy.coordinates import search_around_sky
from astropy import units as u
from astropy.coordinates import Angle, SkyCoord
import matplotlib.cm as cm
from scipy.ndimage import gaussian_filter as gf
from astropy.nddata import Cutout2D
import astropy.visualization as viz
from astropy.wcs import WCS
from astropy.visualization import ZScaleInterval
import astropy.coordinates as coords
from astropy.visualization.wcsaxes import WCSAxes
import astropy.visualization.wcsaxes.frame as frame
from astropy.table import Table
import pandas as pd
from scipy.stats import kde
import time
import pickle
import os
import reproject
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, Normalizer
from IPython.display import Image
import seaborn as sns

%matplotlib notebook
%matplotlib inline

## Getting RA and DEC of cross-matched sources

In [None]:
df = pd.read_csv("INVISIBLE.csv")
folder_path = "/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Restart/FITS_FILES/"

hscmap_input = pd.DataFrame()
hscmap_input["RA"], hscmap_input["dec"] = df["ra"], df["dec"]
hscmap_input.to_csv("hscmap_input.csv")
df.RA_x, df.ra

## Making folders to add respective fits files

When we get files from hscmap, the respective cutouts' FITS files contain no information about the source in their file names. Because of that, we need to find a way to organize the fits files in the event we want to analyize FITS files with particular properties. The only way I found to address this problem is to create a series of files, named by the RAs of the sources we are concerned with. Thru the path files I hope to discern the FITS files from their original sources. Crude, but a good fix for the moment.

In [None]:
folder_path = "/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Analysis/FITS_FILES/"

ra = (df["ra"]).astype(str)

# for folder in ra:
#     os.mkdir(os.path.join(folder_path,folder))


## Organizing FITS files

Now I have the go through all of these RA-named files to take out specific bands (i.e. G, R, I, Z, Y).

In [None]:
g_fits = []
r_fits = []
i_fits = []
y_fits = []
z_fits = []
folder_path_name = "/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Analysis/FITS_FILES"

for subdir, dirs, files in os.walk(folder_path_name):
    for file in files:
        if file.startswith("cutout-HSC-G"):
            g_fits.append([os.path.join(subdir, file).split(os.sep)[-2],os.path.join(subdir, file)])
        if file.startswith("cutout-HSC-R"):
            r_fits.append([os.path.join(subdir, file)])
        if file.startswith("cutout-HSC-I"):
            i_fits.append([os.path.join(subdir, file)])
        if file.startswith("cutout-HSC-Y"):
            z_fits.append([os.path.join(subdir, file)])
        if file.startswith("cutout-HSC-Z"):
            y_fits.append([os.path.join(subdir, file)])
            
g_fits = np.array(g_fits)
r_fits = np.array(r_fits)
i_fits = np.array(i_fits)
y_fits = np.array(y_fits)
z_fits = np.array(z_fits)

We had an original csv that contained all of the information for the sources with no counterparts, and here I'm converting it to a dataframe so we can add the FITS file paths to the csv to reference in further analysis.

In [None]:
updated_df = pd.read_csv("INVISIBLE.csv", usecols = lambda column : column not in 
['Unnamed: 0', 'Unnamed: 0.1','GRI/IZY', 'Visible','Invisible', 'Saturated', 'Visible, Near Saturated Object',\
 'Not Visible, Near Saturated Object', 'Visible, but near an object',\
 'Not Visible, but near an object', 'Unclear'])
updated_df.to_csv("invisible_sources_updated.csv")
updated_df.RA_x.values[0:10], updated_df.ra.values[0:10]

## Indexing to Update CSV

The fun part was that when we go thru the folders containing the FITS files and pull out their file names, the arrays were NOT in the same order as the RAs within the dataframe, so here I'm correcting the indexing to the FITS files to be placed in the csv

In [None]:
indexing = []
for ra_val in updated_df.ra.values:
    for index, val in enumerate(g_fits):
        if ra_val == float(val[0]):
            indexing.append(index)

## After further inspection, more sources to ignore...

In [None]:
sources_to_reject = [149.99096995989999,150.31280999290001,150.014007568359,150.0368600085,\
150.044163961,149.870119959,150.466509985,150.30997643,150.3327999931,\
150.01580999979998,149.838489959,150.22795999049998,149.924705867,149.71426655,\
149.74107997229999,149.8705499936,149.908649983,150.0629500023,150.1785799884,\
150.490958282,150.190563658,150.606725205]

print("Number of sources rejected: %i" % (len(sources_to_reject)))

## Making the compiled csv

In [None]:
x = np.concatenate((g_fits[:,1].reshape(106,1),r_fits,i_fits,z_fits,y_fits), axis = 1)
df2 = pd.DataFrame(data = x[indexing], columns=['gfits', 'rfits', 'ifits', 'zfits', 'yfits'])
result = pd.concat([updated_df, df2], axis=1, sort=False)
result = result[~result['ra'].isin(sources_to_reject)]
result.to_csv("everything_you_need.csv")

In [None]:
checking_chandra = pd.DataFrame()
checking_chandra["ra"] = result.ra
checking_chandra["dec"] = result.dec

checking_chandra.to_csv("checking_chandra.csv", index = False)
checking_chandra.ra.values[0:10]

## Patch of sky where sources are located

In [None]:
plt.style.use("seaborn-white")
plt.figure(figsize = (10,7))
plt.title("Sources: %i \n (w/o counterpart)" % (result.shape[0]), weight = "bold", size = 25)
plt.scatter(result.ra, result.dec, color = "c")
plt.xlabel("RA", weight = "bold", size = 20)
plt.ylabel("Dec", weight = "bold", size = 20)
plt.tick_params("both", labelsize = 15)


## Making cutouts 3" x 3" function

We need to make further cutouts within python (in this case 3" x 3"), because when we do the cutouts from hscMap, the center of the cutout is NOT necessarily the location of the source (as much as I tried!). ALSO, the cutout sizes are necessarily the same sizes, so stacking without doing one more trim would not make sense

## Note: 🤔
FITS Info:

In [None]:
#just downloading image
# hdulist_ = fits.open("/Users/linaflorez/Downloads/cutout-HSC-Y-9813-pdr2_dud-200228-070200.fits")
# hdulist_.info()

In [None]:
#downloading image and mask
# hdulist_ = fits.open("/Users/linaflorez/Downloads/cutout-HSC-Y-9813-pdr2_dud-200228-070412.fits")
# hdulist_.info()

In [None]:
#downloading image, mask, and variance
# hdulist_ = fits.open("/Users/linaflorez/Downloads/cutout-HSC-Y-9813-pdr2_dud-200228-070637.fits")
# hdulist_.info()

In [None]:
def cutout_func(band):  
    #contains all of the relevant fits files
    result = pd.read_csv("everything_you_need.csv")  
    
    #initializing blank arrays
    num = np.zeros((18,18))
    denom = np.zeros((18,18))
    
    flux_mag0 = []
    for i in range(len(result)):
        #Getting RA and Dec of source for cutout
        respective_band = result[band].iloc[i]
        RA = result["ra"].iloc[i]
        DEC = result["dec"].iloc[i]
        hdulist = fits.open(respective_band)
        flux_mag0.append(hdulist[0].header["FLUXMAG0"])
        
        
        # Courtesy of hscMap 
        image = hdulist[1].data
        mask = hdulist[2].data
        variance = hdulist[3].data #for inverse variance

        # Prepping for cutout
        wcs = WCS(hdulist[1].header)
        coords = SkyCoord(ra=RA*u.degree, dec=DEC*u.degree)
        cut_center = SkyCoord(coords)
        size = 3 #arcsecs
        cutout_size = np.array([size, size]) * u.arcsec

        # Doing a cutout on the image array
        cutout = Cutout2D(image, cut_center, cutout_size, wcs)

        # Doing a cutout on the variance array
        variance = Cutout2D(variance, cut_center, cutout_size, wcs=wcs).data


        # Σ_i of (flux_ij/sigma_ij^2)
        num += cutout.data/variance
        
        # Σ_i of (1/sigma_ij^2)
        denom += 1/variance
        

    #stacked cutout array
    cutouts = num/denom 
    
    #Determining the magnitude of the "source at the center"
    smaller_section = cutouts[6:12,6:12]
    magnitude = -2.5 * np.log10(np.sum(smaller_section)/flux_mag0[0])
    flux = np.sum(smaller_section) 
    
    #Determining error 
    error = np.sqrt(1/denom)
    flux_err = np.sqrt(np.sum(error[6:12,6:12]))

    # Plotting stacked image!
    plt.style.use("dark_background")
    fig,(ax1) = plt.subplots(1,1, figsize = (15,10))
    title = ("Stacking %i %s sources w/o counterparts \n Magnitude of central source/pixels: %.3f" % (result.shape[0], band, magnitude))
    plt.title(title, weight = "bold", size = 20)
    plot = plt.imshow(cutouts, cmap = "plasma", extent=[-size/2 ,size/2,-size/2 ,size/2], aspect='auto')
    plt.contour(cutouts, 5, extent=[-size/2 ,size/2,-size/2 ,size/2],cmap='Greys')
    plt.xlabel("Arcsecs", weight = "bold", size = 20)
    plt.ylabel("Arcsecs", weight = "bold", size = 20)
    plt.tick_params("both", labelsize = 20)
    cbar = fig.colorbar(plot)
    cbar.ax.tick_params(labelsize=20)
    plt.savefig("stacked_%s.pdf" % band)
    
    return 
    


## Performing cutouts

In [None]:
cutout_func("gfits"),cutout_func("rfits"),cutout_func("ifits"),cutout_func("zfits"),cutout_func("yfits")

In [None]:
checking_chandra.ra.iloc[0:2].values

In [None]:
result

### 1. Make sure that the stacking code is working correctly

In [None]:
COSMOS_info = pd.read_csv("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/venv/Color_COSMOS_DD.csv")

In [None]:
print("The contents of the COSMOS file: \n",COSMOS_info.columns)

In [None]:
match_df = pd.read_csv("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/venv/VISIBLE.csv")
match_df = match_df[match_df.Unclear != "Y"][match_df.Visible == "Y"]
match_df.columns

In [None]:
match_df["dec"].isin(COSMOS_info["dec"]).any()

https://hsc-release.mtk.nao.ac.jp/das_cutout/pdr2/manual.html#list-columns

In [None]:
respective_band = result["gfits"].iloc[0]
RA = result["ra"].iloc[0]
DEC = result["dec"].iloc[0]
hdulist = fits.open(respective_band)
print("FITS INFO:",hdulist.info(), hdulist[0].header, hdulist[1].data)
flux_mag0 = hdulist[0].header["FLUXMAG0"]


#(1)Do this for sources with counterparts
#(2)Check to make sure that the magnitudes in the center pixels are what we want them to be: ~26mag
magnitudes = -2.5 * np.log10(hdulist[1].data/flux_mag0) # flux units to magnitudes, flux units on colorbar

In [None]:
Image(filename='/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Readings/Screen Shot 2020-02-26 at 1.46.21 AM.png')

## Getting visible source data

In [None]:
match_df = pd.read_csv("no_match_stats.csv")
visible_sources_no_match = match_df[match_df.Unclear != "Y"][match_df.Visible == "Y"]

## Comparing flux of visible and invisible sources

In [None]:
plt.style.use('seaborn-white')
fig, ((ax1,ax2),(ax3,ax4),(ax5,ax6)) = plt.subplots(3,2, figsize = (20,25))

ax1.set_title("Flux_F: %i Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
bins_F=np.linspace(1e-15,1e-14,20) #BIN MADE HERE!!!
ax1.hist(result.flux_F, bins = bins_F, ec='black')[2]
ax1.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax1.tick_params("both", labelsize = 25)

ax2.set_title("Flux_F: %i Visible sources" % (np.shape(visible_sources_no_match)[0]), weight = "bold", size = 27)
ax2.hist(visible_sources_no_match["flux_F"], bins = bins_F, ec='black')[2]
ax2.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax2.tick_params("both", labelsize = 25)

#####################

ax3.set_title("Flux_S: %i Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
ax3.hist(result.flux_S, bins= bins_F, ec='black')[2]
ax3.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax3.tick_params("both", labelsize = 25)

ax4.set_title("Flux_S: %i Visible sources" % (np.shape(visible_sources_no_match)[0]), weight = "bold", size = 27)
ax4.hist(visible_sources_no_match["flux_S"], bins=bins_F, ec='black')[2]
ax4.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax4.tick_params("both", labelsize = 25)

#####################

ax5.set_title("Flux_H: %i Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
ax5.hist(result.flux_H, bins = bins_F, ec='black')[2]
ax5.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax5.tick_params("both", labelsize = 25)

ax6.set_title("Flux_H: %i Visible sources" % (np.shape(visible_sources_no_match)[0]), weight = "bold", size = 27)
ax6.hist(visible_sources_no_match["flux_H"], bins = bins_F, ec='black')[2]
ax6.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax6.tick_params("both", labelsize = 25)

plt.tight_layout()

${\Sigma}$

### Same plots but with qq plots added on

In [None]:
import statsmodels.api as sm 
plt.style.use('seaborn-white')
fig, ((ax1,ax2), (ax3,ax4),(ax5,ax6)) = plt.subplots(3,2,figsize = (20,20))

ax1.set_title("Flux_F: %i \n Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
bins_F=np.linspace(1e-15,1e-14,20) #BIN MADE HERE!!!
ax1.hist(result.flux_F, bins = bins_F, ec='black')[2]
ax1.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax1.tick_params("both", labelsize = 25)
sm.qqplot(np.asarray(result.flux_F), line = "q",ax = ax2)


ax3.set_title("Flux_S: %i Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
ax3.hist(result.flux_S, bins= bins_F, ec='black')[2]
ax3.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax3.tick_params("both", labelsize = 25)
sm.qqplot(np.asarray(result.flux_F), line = "q",ax = ax4)


ax5.set_title("Flux_H: %i Invisible sources" % (np.shape(result)[0]), weight = "bold", size = 27)
ax5.hist(result.flux_H, bins= bins_F, ec='black')[2]
ax5.set_xlabel("(0.5–10 keV Flux ${[erg cm^{−2} s^{−1}]}$)", weight = "bold", size = 25)
ax5.tick_params("both", labelsize = 25)
sm.qqplot(np.asarray(result.flux_H), line = "q",ax = ax6)

plt.tight_layout()