diff --git a/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_alter_table_pkey.sql b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_alter_table_pkey.sql new file mode 100644 index 00000000..d093cdbc --- /dev/null +++ b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_alter_table_pkey.sql @@ -0,0 +1,3 @@ +\o catwise2020_alter_table_pkey.out +alter table catalogdb.catwise2020 add primary key (source_id); +\o diff --git a/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_indexes.sql b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_indexes.sql new file mode 100644 index 00000000..fbf84ddf --- /dev/null +++ b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_indexes.sql @@ -0,0 +1,9 @@ +\o catwise2020_create_indexes.out +create index on catalogdb.catwise2020(q3c_ang2ipix(ra,dec)); +create index on catalogdb.catwise2020(q3c_ang2ipix(ra_pm,dec_pm)); +create index on catalogdb.catwise2020(source_name); +create index on catalogdb.catwise2020(w1mpro); +create index on catalogdb.catwise2020(w2mpro); +create index on catalogdb.catwise2020(w1sigmpro); +create index on catalogdb.catwise2020(w2sigmpro); +\o diff --git a/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_table.sql b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_table.sql new file mode 100644 index 00000000..ff280be1 --- /dev/null +++ b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_create_table.sql @@ -0,0 +1,192 @@ +-- The create table statement is based on the information in the below link: +-- https://portal.nersc.gov/project/cosmo/data/CatWISE/2020cwcat.sis20200318.txt + +create table catalogdb.catwise2020 ( +source_name char(21), +source_id char(25), +ra double precision, +dec double precision, +sigra real, +sigdec real, +sigradec real, +wx real, +wy real, +w1sky real, +w1sigsk real, +w1conf real, +w2sky real, +w2sigsk real, +w2conf real, +w1fitr real, +w2fitr real, +w1snr real, +w2snr real, +w1flux real, +w1sigflux real, +w2flux real, +w2sigflux real, +w1mpro real, +w1sigmpro real, +w1rchi2 real, +w2mpro real, +w2sigmpro real, +w2rchi2 real, +rchi2 real, +nb integer, +na integer, +w1Sat real, +w2Sat real, +w1mag real, +w1sigm real, +w1flg integer, +w1Cov real, +w2mag real, +w2sigm real, +w2flg integer, +w2Cov real, +w1mag_1 real, +w1sigm_1 real, +w1flg_1 integer, +w2mag_1 real, +w2sigm_1 real, +w2flg_1 integer, +w1mag_2 real, +w1sigm_2 real, +w1flg_2 integer, +w2mag_2 real, +w2sigm_2 real, +w2flg_2 integer, +w1mag_3 real, +w1sigm_3 real, +w1flg_3 integer, +w2mag_3 real, +w2sigm_3 real, +w2flg_3 integer, +w1mag_4 real, +w1sigm_4 real, +w1flg_4 integer, +w2mag_4 real, +w2sigm_4 real, +w2flg_4 integer, +w1mag_5 real, +w1sigm_5 real, +w1flg_5 integer, +w2mag_5 real, +w2sigm_5 real, +w2flg_5 integer, +w1mag_6 real, +w1sigm_6 real, +w1flg_6 integer, +w2mag_6 real, +w2sigm_6 real, +w2flg_6 integer, +w1mag_7 real, +w1sigm_7 real, +w1flg_7 integer, +w2mag_7 real, +w2sigm_7 real, +w2flg_7 integer, +w1mag_8 real, +w1sigm_8 real, +w1flg_8 integer, +w2mag_8 real, +w2sigm_8 real, +w2flg_8 integer, +w1NM integer, +w1M integer, +w1magP real, +w1sigP1 real, +w1sigP2 real, +w1k real, +w1Ndf integer, +w1mLQ real, +w1mJDmin double precision, +w1mJDmax double precision, +w1mJDmean double precision, +w2NM integer, +w2M integer, +w2magP real, +w2sigP1 real, +w2sigP2 real, +w2k real, +w2Ndf integer, +w2mLQ real, +w2mJDmin double precision, +w2mJDmax double precision, +w2mJDmean double precision, +rho12 integer, +q12 integer, +nIters integer, +nSteps integer, +mdetID integer, +p1 real, +p2 real, +MeanObsMJD double precision, +ra_pm double precision, +dec_pm double precision, +sigra_pm real, +sigdec_pm real, +sigradec_pm real, +PMRA real, +PMDec real, +sigPMRA real, +sigPMDec real, +w1snr_pm real, +w2snr_pm real, +w1flux_pm real, +w1sigflux_pm real, +w2flux_pm real, +w2sigflux_pm real, +w1mpro_pm real, +w1sigmpro_pm real, +w1rchi2_pm real, +w2mpro_pm real, +w2sigmpro_pm real, +w2rchi2_pm real, +rchi2_pm real, +pmcode char(7), +nIters_pm integer, +nSteps_pm integer, +dist real, +dw1mag real, +rch2w1 real, +dw2mag real, +rch2w2 real, +elon_avg double precision, +elonSig real, +elat_avg double precision, +elatSig real, +Delon real, +DelonSig real, +Delat real, +DelatSig real, +DelonSNR real, +DelatSNR real, +chi2pmra real, +chi2pmdec real, +ka integer, +k1 integer, +k2 integer, +km integer, +par_pm real, +par_pmSig real, +par_stat real, +par_sigma real, +dist_x real, +cc_flags char(16), +w1cc_map integer, +w1cc_map_str char(20), +w2cc_map integer, +w2cc_map_str char(20), +n_aw integer, +ab_flags char(9), +w1ab_map integer, +w1ab_map_str char(13), +w2ab_map integer, +w2ab_map_str char(13), +glon double precision, +glat double precision, +elon double precision, +elat double precision, +unwise_objid char(20) +); diff --git a/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_load.py b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_load.py new file mode 100644 index 00000000..73265b83 --- /dev/null +++ b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_load.py @@ -0,0 +1,87 @@ +# Program: catwise2020_load.py +# Aim: load catwise2020 csv files into the postgreSQL sdss5db database. +# +# The program checks so that it does not reload csv files which already +# have a csv.load.out file. + +import glob +import os.path + + +DEBUG = False + +# Note that csv_dir and csvout_dir must end with / + +csv_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/CatWISE/2020csv/" # noqa E501 +csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/CatWISE/2020csvout/" # noqa E501 + +list_of_csv_files = glob.glob(csv_dir + "*ab_v5_cat*.csv") +list_of_csv_files.sort() + +fout = open(csvout_dir + "load.csv.out", "a") + +if DEBUG is True: + list_of_csv_files = [csv_dir + "0000m016_opt1_20191208_213403_ab_v5_cat_b0.csv"] + +for i in range(len(list_of_csv_files)): + full_csv_file = list_of_csv_files[i] + csv_file = os.path.basename(full_csv_file) + csvout_file = csv_file.replace('.csv', '.csv.out') + csvsql_file = csv_file.replace('.csv', '.csv.sql') + + # both csvout_file and csvsql_file are put in csvout_dir directory + full_csvout_file = csvout_dir + csvout_file + full_csvsql_file = csvout_dir + csvsql_file + # if csv file exists then skip this csv file and goto next csv file + if os.path.isfile(full_csvout_file): + print("skipping loading csv file since csv.out file already exists:", + csvout_file) + print("skipping loading csv file since csv.out file already exists:", + csvout_file, + file=fout, flush=True) + continue + + fpgscript = open(full_csvsql_file, "w") + print("\\o " + full_csvout_file, file=fpgscript) + print("\\copy catalogdb.catwise2020 ", file=fpgscript, end='') + print("from '" + full_csv_file + "' ", file=fpgscript, end='') + print(" with csv header null 'NULL'; ", file=fpgscript) + print("\\o", file=fpgscript) + print("\\q", file=fpgscript) + fpgscript.close() + + print("load start:", csv_file) + print("load start:", csv_file, file=fout, flush=True) + + pgcopy_output = os.popen("psql -U postgres sdss5db " + + " -a -f " + full_csvsql_file).read() + + wc_output = os.popen("wc -l " + full_csv_file).read() + num_lines_str, file_name = wc_output.split() + num_lines = int(num_lines_str) + # do not count the header line + # so reduce num_lines by one + num_lines = num_lines - 1 + print(csv_file, ":contains:", num_lines) + print(csv_file, ":contains:", num_lines, file=fout, flush=True) + + fcsvout = open(full_csvout_file, "r") + line = fcsvout.readline() + copytext, num_rows_loaded_str = line.split() + num_rows_loaded = int(num_rows_loaded_str) + print(csvout_file, ":loaded:", num_rows_loaded) + print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True) + fcsvout.close() + + if(num_lines != num_rows_loaded): + print("load error:num_lines!=num_rows_loaded", csv_file) + print("load error:num_lines!=num_rows_loaded", + csv_file, file=fout, flush=True) + + print("load end:", csv_file) + print("load end:", csv_file, file=fout, flush=True) + +print("loaded all csv files") +print("loaded all csv files", file=fout, flush=True) + +fout.close() diff --git a/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_tbl2csv.py b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_tbl2csv.py new file mode 100644 index 00000000..1103f7e3 --- /dev/null +++ b/schema/sdss5db/catalogdb/CatWISE2020/catwise2020_tbl2csv.py @@ -0,0 +1,125 @@ +# Program: catwise2020_tbl2csv.py +# Aim: convert catwise2020 tbl files to CSV. +# +# The program checks so that it does not convert tbl files which already +# have a corresponding csv file. + +import glob +import os.path + +import sys + +import astropy.table +from astropy.io import ascii + +DEBUG = False + +# input_dir and output_dir must end with / +input_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/CatWISE/2020/" # noqa: E501 +output_dir = "/uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/catalogs/CatWISE/2020csv/" # noqa: E501 + +list_of_tbl_files = glob.glob(input_dir + "*ab_v5_cat*.tbl") +list_of_tbl_files.sort() + +if (len(sys.argv) != 2): + print("usage:") + print("catwise2020_tbl2csv.py n") + print("where n = 0 or 1 or 2") + +n = int(sys.arv[1]) +if(n == 0): + out_file = "tbl2csv.out" +elif(n == 1): + out_file = "tbl2csv.1.out" +elif(n == 2): + out_file = "tbl2csv.2.out" +else: + print("tbl2csv:error:n =", n) + sys.exit(1) + +fout = open(output_dir + out_file, "a") + + +num_tbl_files = len(list_of_tbl_files) +if(n == 0): + lower_limit = 0 + upper_limit = num_tbl_files +elif(n == 1): + lower_limit = 0 + upper_limit = num_tbl_files // 2 +elif(n == 2): + lower_limit = num_tbl_files // 2 + upper_limit = num_tbl_files +else: + print("tbl2csv:error:n =", n) + sys.exit(1) + +if DEBUG is True: + list_of_tbl_files = [input_dir + + "0000m016_opt1_20191208_213403_ab_v5_cat_b0.tbl"] + lower_limit = 0 + upper_limit = 1 + +for i in range(lower_limit, upper_limit): + tbl_file = os.path.basename(list_of_tbl_files[i]) + csv_file = tbl_file.replace('.tbl', '.csv') + full_tbl_file = input_dir + tbl_file + full_csv_file = output_dir + csv_file + # if csv file exists then skip this tbl file and goto the next tbl file + if os.path.isfile(full_csv_file): + print("tbl2csv:info:skipping tbl file since csv file already exists:", + csv_file) + print("tbl2csv:info:skipping tbl file since csv file already exists:", + csv_file, file=fout, flush=True) + continue + + wc_output = os.popen("wc -l " + full_tbl_file).read() + num_lines_tbl_str, file_name = wc_output.split() + num_lines_tbl = int(num_lines_tbl_str) + # Most catwise2020 .tbl files have 19 header lines e.g. + # 0791m137_opt1_20191128_221825_ab_v5_cat_b0.tbl + # Some catwise2020 .tbl files have 20 line header e.g. + # 0791m682_opt0_20200110085825_ab_v5_cat_b0.tbl + # + # Do not count the 19 header lines. + # So reduce num_lines_tbl by 19. + num_lines_tbl = num_lines_tbl - 19 + print(tbl_file, ":", num_lines_tbl) + print(tbl_file, ":", num_lines_tbl, file=fout, flush=True) + + table = astropy.table.Table.read(full_tbl_file, format='ascii.ipac') + table.meta = {} + # fill_values replaces null in the tbl file with NULL + # See the section on NULL in the below link: + # https://www.postgresql.org/docs/12/sql-copy.html + # + # format='csv' will print a one line header with the column names + table.write(full_csv_file, + format='csv', + fill_values=[(ascii.masked, 'NULL')], + overwrite=True) + + wc_output = os.popen("wc -l " + full_csv_file).read() + num_lines_csv_str, file_name = wc_output.split() + num_lines_csv = int(num_lines_csv_str) + # Do not count the CSV header line. + # So reduce num_lines_csv by one. + num_lines_csv = num_lines_csv - 1 + print(csv_file, ":", num_lines_csv) + print(csv_file, ":", num_lines_csv, file=fout, flush=True) + if(num_lines_tbl != num_lines_csv): + # Some .tbl files have one more header line. + # See the above comment about 19 header lines. + if(num_lines_tbl == num_lines_csv + 1): + print("tbl2csv:info:num_lines_tbl==num_lines_csv+1", csv_file) + print("tbl2csv:info:num_lines_tbl==num_lines_csv+1", + csv_file, file=fout, flush=True) + else: + print("tbl2csv:error:num_lines_tbl!=num_lines_csv", csv_file) + print("tbl2csv:error:num_lines_tbl!=num_lines_csv", + csv_file, file=fout, flush=True) + +print("tbl2csv:info:converted all tbl files to csv") +print("tbl2csv:info:converted all tbl files to csv", file=fout, flush=True) + +fout.close()