In [None]:
import numpy as np
from astropy.io import fits as fits
import matplotlib.pyplot as plt
import matplotlib.patches as patches 
from astropy.coordinates import SkyCoord
from astropy.coordinates import match_coordinates_sky
from astropy.coordinates import search_around_sky
from astropy import units as u
from astropy.coordinates import Angle, SkyCoord
import matplotlib.cm as cm
from scipy.ndimage import gaussian_filter as gf
from astropy.nddata import Cutout2D
import astropy.visualization as viz
from astropy.wcs import WCS
from astropy.visualization import ZScaleInterval
import astropy.coordinates as coords
from astropy.visualization.wcsaxes import WCSAxes
import astropy.visualization.wcsaxes.frame as frame
from astropy.table import Table
import pandas as pd
import scipy.stats as st
from scipy.stats import kde, sigmaclip
import time
import pickle
import os
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, Normalizer
from IPython.display import Image
import seaborn as sns

%matplotlib notebook
%matplotlib inline

### Gathering ALL HSC and Chandra Data

In [None]:
Chandra_Data = Table.read('Chandra_COSMOS_Legacy_20151120_4d.fits', format='fits').to_pandas()
HSC_Data = pd.read_csv("Color_COSMOS_DD.csv", dtype = np.float64).dropna(axis = 0, how = "any")

### Determining counterparts

In [None]:
#pickles contain the ra & dec in a more manageable way!
with open('/Users/linaflorez/Desktop/ObsCos/COSMOS_research/venv/moredataHSC_skycoords.pkl', 'rb') as f:
    HSC_catalog = pickle.load(f)
with open('/Users/linaflorez/Desktop/ObsCos/COSMOS_research/venv/Chandra_skycoords.pkl', 'rb') as f:
    Chandra_catalog = pickle.load(f)

    
#determining counterparts
max_sep = 2.5 * u.arcsec
#idx1 indexes Chandra, idx2 indexes HSC
idx1, idx2, sep2d, dist3d = search_around_sky(Chandra_catalog, HSC_catalog, max_sep) 


#applying the indices to the full dataframes (from 5e6 points and 4e3 to 3e3 dataframes)
chandra_mask = Chandra_Data["RA_x"].isin(Chandra_catalog.ra[idx1].value)
hsc_mask = HSC_Data["# ra"].isin(HSC_catalog.ra[idx2].value)
HSC_matches = HSC_Data[hsc_mask]
Chandra_matches = Chandra_Data[chandra_mask]


#combining the dataframes
HSC_matches.sort_values(by=['# ra'], inplace=True)
Chandra_matches.sort_values(by=['RA_x'], inplace=True)
HSC_matches = HSC_matches.drop(HSC_matches.index[[2,3]])
HSC_matches = HSC_matches.reset_index()
Chandra_matches = Chandra_matches.reset_index()

print("Size of HSC catalog:", np.shape(HSC_Data), "&", "HSC length:", np.shape(HSC_matches)) 
print("Size of Chandra catalog:", np.shape(Chandra_Data),"Chandra length:", np.shape(Chandra_matches))

### Combining data from Chandra and HSC of the matches into one dataframe

In [None]:
matches = pd.concat([Chandra_matches, HSC_matches], axis = 1).reindex(Chandra_matches.index)
matches["RA_x"].values[0], matches["# ra"].values[0]

In [None]:
matches.shape

### Comparing the 84 Chandra sources w/0 optical counterparts to the whole Chandra catalog

In [None]:
not_matched = pd.read_csv("no_optical_counterpart.csv")
# not_matched.columns

In [None]:
plt.style.use("seaborn")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15,5))
hist = sns.distplot(not_matched.flux_F, color = "tomato", rug = True, bins = np.arange(0,1e-13,1e-15), \
                    kde = False, norm_hist = False, hist_kws={'edgecolor':'black'},ax = ax1)
hist.set_xlabel("flux_F${_{matches}(0.5 - 10keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist.set_ylabel("Counts", fontsize=15, fontweight='bold')
hist.set_xlim([0,.15e-13])
# hist.set_yscale("log")
ax1.tick_params("both", labelsize = 20)


hist2 = sns.distplot(Chandra_Data.flux_F, color = "magenta", rug = False,bins = np.arange(0,1e-13,0.1e-14),\
                     kde = False, norm_hist = False, hist_kws={'edgecolor':'black'},ax = ax2)
# hist2.set_yscale("log")
hist2.set_xlim([0,1.5e-14])
hist2.set_xlabel("flux_F${_{Chandra\:Data}(0.5 - 10keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist2.set_ylabel("Counts", fontsize=15, fontweight='bold')
ax2.tick_params("both", labelsize = 20)
plt.tight_layout()
plt.savefig("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Plots/Comparison/flux_F.png")

In [None]:
plt.style.use("seaborn")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15,5))
hist = sns.distplot(not_matched.flux_S, color = "tomato", rug = True, bins = np.arange(0,1e-13, 1e-16),\
                    kde = False, norm_hist = False, hist_kws={'edgecolor':'black'},ax = ax1)
hist.set_xlabel("flux_S${_{not\:matches}(0.5 - 2keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist.set_ylabel("Counts", fontsize=15, fontweight='bold')
# hist.set_yscale("log")
hist.set_xlim([0,1.5e-15])
ax1.tick_params("both", labelsize = 20)


hist2 = sns.distplot(Chandra_Data.flux_S, color = "magenta", rug = False, bins = np.arange(0,1e-13, 1e-16),\
                     kde = False, norm_hist = False,hist_kws={'edgecolor':'black'}, ax = ax2)
# hist2.set_yscale("log")
hist2.set_xlim([0,1.5e-15])
hist2.set_xlabel("flux_S${_{Chandra\:Data}(0.5 - 2keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist2.set_ylabel("Counts", fontsize=15, fontweight='bold')
ax1.tick_params("both", labelsize = 20)
ax2.tick_params("both", labelsize = 20)
plt.tight_layout()
plt.savefig("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Plots/Comparison/flux_S.png")

In [None]:
plt.style.use("seaborn")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15,5))
hist = sns.distplot(not_matched.flux_H, color = "tomato", rug = True, bins = np.arange(0,1e-13, 1e-15),\
                    kde = False, norm_hist = False, hist_kws={'edgecolor':'black'},ax = ax1)
hist.set_xlabel("flux_H${_{matches}(2 - 10keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist.set_ylabel("Counts", fontsize=15, fontweight='bold')
hist.set_yscale("log")
hist.set_xlim([0,2e-14])
ax1.tick_params("both", labelsize = 20)


hist2 = sns.distplot(Chandra_Data.flux_H, color = "magenta", rug = False, bins = np.arange(0,1e-13, 1e-15),\
                     kde = False, norm_hist = False, hist_kws={'edgecolor':'black'},ax = ax2)
hist2.set_yscale("log")
hist2.set_xlabel("flux_H${_{ChandraData}(0.5 - 2keV\:[erg*cm^{-2}*s^{-1}])}$", fontsize=15, fontweight='bold')
hist2.set_ylabel("Counts", fontsize=15, fontweight='bold')
ax1.tick_params("both", labelsize = 20)
ax2.tick_params("both", labelsize = 20)
plt.tight_layout()
plt.savefig("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Plots/Comparison/flux_H.png")

#### Matched bin based on HR

The curve shows the density plot which is essentially a smooth version of the histogram. The y-axis is in terms of density, and the histogram is normalized by default so that it has the same y-scale as the density plot.

In [None]:
plt.style.use("seaborn")
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15,5))


hist2 = sns.distplot(matches.HR[matches.HR > -60], color = "blue", bins = np.arange(-1,1,.05), \
                     rug = False, hist=True, kde=True, hist_kws={'edgecolor':'black'}, ax = ax1)
hist2.set_xlabel("HR$_{matched}$", fontsize=15, fontweight='bold')
hist2.set_ylabel("Density", fontsize=15, fontweight='bold')


hist = sns.distplot(not_matched.HR[not_matched.HR > -20], color = "tomato", bins = np.arange(-1,1,.05), \
                    rug = True, hist=True, kde=True,hist_kws={'edgecolor':'black'}, ax = ax2)
hist.set_xlabel("HR$_{not\:matched}$", fontsize=15, fontweight='bold')
hist.set_ylabel("Density", fontsize=15, fontweight='bold')
ax1.tick_params("both", labelsize = 20)


hist3 = sns.distplot(Chandra_Data.HR[Chandra_Data.HR > -60], color = "magenta", bins = np.arange(-1,1,.05), \
                     rug = False, hist=True, kde=True, hist_kws={'edgecolor':'black'}, ax = ax3)
hist3.set_xlabel("HR$_{Chandra\:Data}$", fontsize=15, fontweight='bold')
hist3.set_ylabel("Density", fontsize=15, fontweight='bold')

ax1.tick_params("both", labelsize = 20)
ax2.tick_params("both", labelsize = 20)
ax3.tick_params("both", labelsize = 20)

plt.tight_layout()
plt.savefig("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Plots/Comparison/HR_notmatchvall.png")

len(matches), len(not_matched), len(Chandra_Data)

In [None]:
plt.style.use("seaborn")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (15,5))
hist = sns.distplot(not_matched.HR[not_matched.HR < -50], color = "tomato",\
                    rug = True, hist=True, kde=False, hist_kws={'edgecolor':'black'}, ax = ax1)
hist.set_xlabel("HR$_{not\:matched}$", fontsize=15, fontweight='bold')
hist.set_ylabel("Count", fontsize=15, fontweight='bold')
hist.set_xlim = [-99.1, -98.9]
ax1.tick_params("both", labelsize = 20)


hist2 = sns.distplot(Chandra_Data.HR[Chandra_Data.HR < -50],color = "magenta",  \
                     rug = False, hist=True, kde=False, hist_kws={'edgecolor':'black'}, ax = ax2)
hist2.set_xlim = [-99.1, -98.9]
hist2.set_ylabel("Count", fontsize=15, fontweight='bold')
ax1.tick_params("both", labelsize = 20)
ax2.tick_params("both", labelsize = 20)
plt.tight_layout()
plt.savefig("/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Plots/Comparison/HR_notmatchvall2.png")

In [None]:
len(HSC_Data["rmag_psf"][0::50]) * 50

In [None]:
HSC_Data.columns

### Creating separate HSC and Chandra dataframes containing only sources with counterparts
(i.e. sources that have matches)

In [None]:
Chandra_matches = Chandra_catalog[idx1]
HSC_matches = HSC_Data.loc[HSC_Data["# ra"].isin(HSC_catalog[idx2].ra.value)]#.any()

### Getting sources with a certain G magnitude
I wanted to focus HSC sources that had counterparts & a gcmodel_magnitude of roughly 26

In [None]:
gcmodel_mag26ish = HSC_matches.loc[abs(26 - HSC_matches["gcmodel_mag"]) < 0.05][["# ra","dec"]][0:40:4]
gcmodel_mag26ish.to_csv("gcmodel_mag26ish.csv", index=False) #going to input this into hscmap

In [None]:
folder_path = "/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Restart/CheckingStackingG/"
ra = (gcmodel_mag26ish["# ra"]).astype(str)

## Making folders for all of the sources
# for folder in ra:
#     os.mkdir(os.path.join(folder_path,folder))

g_fits = []

# folder_path_name = "/Users/linaflorez/Desktop/ObsCos/COSMOS_research/Restart/FITS_FILES"

for subdir, dirs, files in os.walk(folder_path):
    for file in files:
        if file.startswith("cutout-HSC-G"):
            g_fits.append([os.path.join(subdir, file).split(os.sep)[-2],os.path.join(subdir, file)])
            
g_fits = np.array(g_fits)

#Organizing the sources for handling later on
indices = []
for index_j,j in enumerate(ra.values):
    for index_i,i in enumerate(g_fits[:,0]):
        if j == i:
            indices.append(index_i)
gcmodel_mag26ish["gfits"] = g_fits[indices][:,1]