In [1]:
import os
os.chdir("/Users/users/lourens/RDMP/master_research/code/SGQ_classification/")

# Preprocessing for the CPz catalog
This notebook imports the data from the CPz catalog by Logan+ 2020 from the paper "Unsupervised star, galaxy, QSO classification: Application of HDBSCAN" and generates 5 different datasets each with different sets of colours constructed from the magnitudes listed in the CPz catalog. The 5 different datasets are defined as follows:
- STAR: Contains the colours listed as "*best_star_colours*" in Table 3 of Logan & Fotopoulou (2020).
- GAL: Contains the colours listed as "*best_gal_colours*" in Table 3 of Logan & Fotopoulou (2020).
- QSO: Contains the colours listed as "*best_qso_colours*" in Table 3 of Logan & Fotopoulou (2020).
- ALL: Contains the colours listed in the "ALL" column of Table 2 of Logan & Fotopoulou (2020).
- SDSS: Contains all combinations of the total and 3 arcsecond aperture magnitudes as measured in the ugriz filters by SDSS that are present in the CPz catalog.

Each of these datasets are saved in a separate FITS files which are stored in separate folders corresponding to the dataset type. 

## Imports

In [2]:
import os
from astropy.table import Table

import itertools as iter

from SHARC.utils import insertColors, writeDataset

total_magnitudes = ["umag", "gmag", "rmag", "imag", "zmag",
                    "Ymag", "Jmag", "Hmag", "Kmag",
                    "W1mag", "W2mag"]
aperture_magnitudes = ["u3mag", "g3mag", "r3mag", "i3mag", "Z3mag",
                       "Y3mag", "J3mag", "H3mag", "K3mag"]

# colors used for different classifiers in HDBSCAN paper
colors_STAR = ["Kmag-Y3mag", "Kmag-J3mag", "Kmag-Z3mag", "Kmag-H3mag", "J3mag-K3mag",
               "Y3mag-K3mag", "J3mag-W1mag", "Y3mag-W1mag", "Jmag-Kmag", "H3mag-K3mag",
               "H3mag-W1mag", "Ymag-Kmag", "Hmag-Y3mag", "Y3mag-W2mag", "J3mag-W2mag",
               "imag-g3mag", "Z3mag-W1mag", "Z3mag-K3mag", "zmag-u3mag", "Hmag-J3mag"] # from Table 3 (italics, i.e. colours only)
colors_GAL = ["gmag-Jmag", "Ymag-W1mag", "J3mag-W1mag", "Y3mag-W1mag", "J3mag-W2mag",
              "H3mag-W2mag", "Y3mag-W2mag", "Z3mag-W2mag", "Kmag-J3mag", "H3mag-W1mag",
              "Z3mag-W1mag", "Kmag-H3mag", "Hmag-W2mag", "Kmag-W2mag", "W1mag-W2mag",
              "imag-W2mag", "gmag-Kmag", "gmag-Hmag", "imag-W1mag", "rmag-Hmag",
              "g3mag-i3mag", "rmag-Z3mag", "rmag-imag", "r3mag-i3mag", "K3mag-W2mag",
              "rmag-zmag", "rmag-Y3mag", "Hmag-J3mag", "imag-u3mag"] # from Table 3 (italics, i.e. colours only)
colors_QSO = ["J3mag-W1mag", "Y3mag-W1mag", "J3mag-W2mag", "H3mag-W2mag", "Y3mag-W2mag",
              "Z3mag-W2mag", "Kmag-J3mag", "H3mag-W1mag", "Z3mag-W1mag", "Kmag-H3mag",
              "Hmag-W2mag", "Kmag-W2mag", "W1mag-W2mag", "gmag-Jmag", "imag-W2mag",
              "gmag-Kmag", "gmag-Hmag", "imag-W1mag", "rmag-Hmag", "g3mag-i3mag",
              "rmag-Z3mag", "rmag-imag", "r3mag-i3mag", "K3mag-W2mag", "rmag-zmag",
              "rmag-Y3mag", "Hmag-J3mag", "imag-u3mag"] # from Table 3 (italics, i.e. colours only)
colors_ALL = ["Kmag-Y3mag", "Kmag-J3mag", "Kmag-H3mag", "J3mag-W1mag", "J3mag-K3mag",
              "Y3mag-W1mag", "H3mag-W1mag", "H3mag-K3mag", "Jmag-Kmag", "Y3mag-K3mag"] # from Table 2 ("ALL")
colors_SDSS = [m1 + "-" + m2 for m1, m2 in iter.combinations(total_magnitudes[:5] + aperture_magnitudes[:5], 2)]

# path to storage
data_path = "/Users/users/lourens/RDMP/master_research/data/HDBSCAN2"

# import CPz dataset
table = Table.read(os.path.join(data_path, "cpz.dat"), readme=os.path.join(data_path, "ReadMe"), format="ascii.cds")
display(table)

id,RAdeg,DEdeg,z,Hclass,umag,e_umag,gmag,e_gmag,rmag,e_rmag,imag,e_imag,zmag,e_zmag,Ymag,e_Ymag,Jmag,e_Jmag,Hmag,e_Hmag,Kmag,e_Kmag,W1mag,e_W1mag,W2mag,e_W2mag,u3mag,e_u3mag,g3mag,e_g3mag,r3mag,e_r3mag,i3mag,e_i3mag,Z3mag,e_Z3mag,Y3mag,e_Y3mag,J3mag,e_J3mag,H3mag,e_H3mag,K3mag,e_K3mag,Yhlr,Jhlr,Hhlr,Khlr,PCAs1c,PCAs2c,PCAs3c,PCAg1c,PCAg2c,PCAg3c,PCAq1c,PCAq2c,PCAq3c,ClasscO,CassscA,dpc,Poutc,Pstarc,Pgalc,Pqsoc,Labelc,PCAs1cHLR,PCAs2cHLR,PCAs3cHLR,PCAg1cHLR,PCAg2cHLR,PCAg3cHLR,PCAq1cHLR,PCAq2cHLR,PCAq3cHLR,ClasscHLRO,ClasscHLRA,dpcHLR,PoutcHLR,PstarcHLR,PgalcHLR,PqsocHLR,LabelcHLR
Unnamed: 0_level_1,deg,deg,Unnamed: 3_level_1,Unnamed: 4_level_1,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,mag,arcsec,arcsec,arcsec,arcsec,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1
str24,float64,float64,float64,int64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,int64,int64,int64,float64,float64,float64,float64,int64,float64,float64,float64,float64,float64,float64,float64,float64,float64,int64,int64,int64,float64,float64,float64,float64,int64
0104-00119,34.41787,-4.72127,0.72559,1,23.4122,0.0505,22.8534,0.0211,22.2006,0.0206,21.3921,0.0125,20.9957,0.0205,20.942,0.1872,21.307,0.8141,20.6116,0.4225,20.0746,0.2015,19.2583,0.071,20.1558,0.305,23.6573,0.0413,23.1494,0.0175,22.5104,0.017,21.6177,0.0097,21.3693,0.0174,20.9169,0.1349,21.2552,0.6533,20.4513,0.3581,19.7707,0.1487,0.75589,0.64208,0.66781,0.62961,-1.15732,-2.11252,0.26002,-1.11161,-0.27282,-0.44797,-1.06384,-0.26909,-0.42575,2,2,0,0.0,0.0,1.0,0.0,2,-1.45773,-0.82213,0.18082,-1.4632,-0.16978,0.00834,0.97244,-0.09133,-0.75841,2,2,0,0.0,0.0,0.99,0.01,2
0104-00184,34.37192,-4.59308,0.44925,1,22.018,0.0229,21.3995,0.0088,20.4452,0.0065,19.9624,0.0057,19.8319,0.0107,19.6647,0.0713,19.6658,0.2254,19.1885,0.1735,19.0394,0.1412,19.3925,0.08,19.8699,0.244,22.5703,0.0175,21.9737,0.0065,21.0491,0.005,20.5636,0.0044,20.4156,0.0074,19.931,0.0653,19.8437,0.1613,19.5163,0.1552,19.3704,0.1149,1.04066,0.92622,1.00097,1.0041,-0.19838,-0.40535,0.55346,-0.03172,-0.20366,0.13045,-0.02409,-0.20295,0.13825,2,2,0,0.0,0.0,1.0,0.0,2,-0.02468,0.10831,0.1801,-0.03298,0.12811,0.17348,0.07296,0.04991,-0.26422,2,2,0,0.0,0.0,1.0,0.0,2
0104-00227,34.41814,-4.5809,0.699164,1,22.4891,0.0389,21.7992,0.0141,21.0215,0.012,20.0603,0.0068,19.7914,0.0115,19.5855,0.0688,19.35,0.1776,19.0366,0.1412,18.6493,0.0827,18.4685,0.047,19.0239,0.12,23.336,0.0328,22.562,0.0109,21.7882,0.0091,20.8264,0.0053,20.5733,0.0084,19.9825,0.0631,19.616,0.1312,19.251,0.122,18.9551,0.0708,0.971,0.95445,0.98895,0.94643,-0.93719,-0.72131,0.39908,-1.03261,-0.04297,-0.12212,-1.02196,-0.04914,-0.13224,2,2,0,0.0,0.0,1.0,0.0,2,-1.0812,-0.7045,0.3711,-1.13309,-0.3724,0.2693,0.95536,0.1676,-0.21571,2,2,0,0.0,0.0,1.0,0.0,2
0104-00250,34.28431,-4.5688,0.700158,1,25.1815,0.2072,24.5456,0.0827,23.2131,0.0465,22.047,0.0197,21.5288,0.0272,21.3728,0.2525,21.0723,0.5306,20.5104,0.4139,20.1193,0.2532,19.6805,0.1,20.3989,0.407,25.5502,0.2235,24.7088,0.0728,23.4384,0.0425,22.2685,0.0179,21.7897,0.0253,21.2665,0.2362,20.9326,0.4821,20.4252,0.327,20.1081,0.2153,0.62192,0.66663,0.66207,0.61048,-0.9556,-1.09699,-0.55277,-1.43977,0.88886,-0.79804,-1.41286,0.88817,-0.79365,2,2,0,0.04,0.0,0.96,0.0,2,-1.33912,-1.57864,-0.21313,-1.58641,-1.1678,-0.39228,1.6379,0.78371,-0.66597,2,2,0,0.0,0.0,1.0,0.0,2
0104-00318,34.41644,-4.45668,0.8911,1,23.408,0.0784,23.1214,0.0391,22.5987,0.045,21.7409,0.0255,21.3541,0.038,20.9172,0.2132,21.0162,0.6402,20.3631,0.3498,20.0327,0.2235,19.4532,0.09,20.4447,0.441,23.6476,0.0552,23.3975,0.0277,22.8581,0.0311,22.036,0.0179,21.6268,0.0255,21.0259,0.1785,21.0532,0.549,20.3611,0.3037,19.9644,0.183,-99.0,0.79629,0.77516,0.60253,-0.94072,-1.54501,0.49898,-1.01558,0.19126,-0.47487,-0.97909,0.19416,-0.4553,2,2,0,0.0,0.0,1.0,0.0,2,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,--,--,--,-99.0,-99.0,-99.0,-99.0,--
0104-00332,34.29936,-4.55849,0.881673,1,22.933,0.0402,22.5699,0.0202,22.1956,0.0247,21.2474,0.014,21.1201,0.0274,20.6719,0.1849,20.455,0.525,20.758,0.526,20.043,0.2879,19.7255,0.105,20.2979,0.385,23.3593,0.0323,23.049,0.0164,22.6399,0.019,21.6584,0.0105,21.426,0.0179,20.6922,0.1636,20.616,0.3585,20.7357,0.4616,19.9892,0.1783,0.85328,0.725,0.89078,0.62758,-0.41181,-1.96153,0.23514,-0.74379,-0.13601,-0.97529,-0.72423,-0.13372,-0.9891,2,2,0,0.03,0.0,0.95,0.02,2,-1.01724,-0.82098,-0.37191,-1.02814,-0.37939,-0.48628,0.84192,-0.26775,-0.42765,2,2,0,0.0,0.0,0.91,0.09,2
0104-00341,34.48687,-4.46736,0.696882,1,25.765,0.366,24.6372,0.0854,23.0942,0.0422,21.8836,0.018,21.4975,0.0255,20.7233,0.1647,20.6791,0.5839,20.0319,0.2901,19.9739,0.2192,19.4583,0.086,19.6408,0.194,25.9623,0.3771,24.7553,0.0807,23.2543,0.0405,22.0203,0.0169,21.6621,0.0241,20.8264,0.1277,20.5578,0.3488,20.0443,0.2279,19.8419,0.1856,-99.0,0.64148,0.65005,0.60408,-0.77551,-0.90651,-0.36548,-1.68566,0.67689,-1.2136,-1.69876,0.66487,-1.27516,2,2,0,0.0,0.0,1.0,0.0,2,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,--,--,--,-99.0,-99.0,-99.0,-99.0,--
0104-00344,34.55656,-4.41331,0.583802,1,24.1408,0.1337,23.4812,0.0477,22.4308,0.0262,21.8121,0.0238,21.2051,0.0314,21.1025,0.1868,21.2958,0.8372,20.4332,0.2802,20.0682,0.1514,19.8322,0.119,20.4818,0.434,24.2714,0.0996,23.6495,0.036,22.6888,0.0209,22.018,0.0185,21.5583,0.0277,20.9636,0.1719,21.1572,0.5935,20.2633,0.2817,19.8856,0.1667,-99.0,0.61444,0.6567,0.54677,-0.87459,-1.41045,-0.33783,-0.83957,0.15256,-0.50151,-0.8067,0.156,-0.48578,2,2,0,0.0,0.0,1.0,0.0,2,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,--,--,--,-99.0,-99.0,-99.0,-99.0,--
0104-00382,34.59877,-4.77366,0.878214,1,24.1627,0.1384,23.3826,0.0477,22.2485,0.0309,21.0215,0.0123,20.2851,0.0157,20.1854,0.1155,19.7425,0.2642,19.4599,0.1936,18.8423,0.1003,18.4743,0.046,18.8308,0.097,25.0516,0.1449,24.1337,0.0429,22.8717,0.0249,21.606,0.0098,20.9743,0.0131,20.3438,0.095,20.0334,0.1964,19.5266,0.1527,19.0264,0.0773,0.85328,0.8615,0.79449,0.79162,-1.4647,-0.93995,-0.27206,-1.9234,0.39116,-0.59397,-1.92114,0.37759,-0.63421,2,2,0,0.0,0.0,1.0,0.0,2,-1.92322,-1.12588,-0.08182,-2.03128,-0.67844,-0.19294,2.04412,0.3123,0.00664,2,2,0,0.0,0.0,1.0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


## Write datasets

In [3]:
datasets = ["STAR", "GAL", "QSO", "ALL", "SDSS"]
for dataset in datasets:
    table_tmp = table.copy() # make a hardcopy of the table
    insertColors(table_tmp, eval(f"colors_{dataset}"))
    writeDataset(table_tmp, filename=os.path.join(data_path, dataset, f"CPz_{dataset}_dataset.fits.gz"), verbose=True, overwrite=True)
    del table_tmp

Writing data to /Users/users/lourens/RDMP/master_research/data/HDBSCAN2/STAR/CPz_STAR_dataset.fits.gz ...
Write successful!
Writing data to /Users/users/lourens/RDMP/master_research/data/HDBSCAN2/GAL/CPz_GAL_dataset.fits.gz ...
Write successful!
Writing data to /Users/users/lourens/RDMP/master_research/data/HDBSCAN2/QSO/CPz_QSO_dataset.fits.gz ...
Write successful!
Writing data to /Users/users/lourens/RDMP/master_research/data/HDBSCAN2/ALL/CPz_ALL_dataset.fits.gz ...
Write successful!
Writing data to /Users/users/lourens/RDMP/master_research/data/HDBSCAN2/SDSS/CPz_SDSS_dataset.fits.gz ...
Write successful!
