diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_alter_table_pkey.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_alter_table_pkey.sql new file mode 100644 index 00000000..69c49d89 --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_alter_table_pkey.sql @@ -0,0 +1,3 @@ +\o supercosmos_alter_table_pkey.out +alter table catalogdb.supercosmos add primary key (objID); +\o diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c b/schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c new file mode 100644 index 00000000..e7e1f60d --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c @@ -0,0 +1,186 @@ +// Program supercosmos_binary.c +// Aim: Read a SuperCOSMOS binary data file and output a CSV file +// +// Python struct module format from readSSABinary.py: +// '<6q f 7d 5f b 12f 5b 4f 8i 8f' +// + +#include +#include +#include + +int main(int argc, char *argv[]) { + + long long x6q[6]; + float xf[1]; + double x7d[7]; + float x5f[5]; + signed char xb[1]; + float x12f[12]; + signed char x5b[5]; + float x4f[4]; + int x8i[8]; + float x8f[8]; + + int nx6q; + int nxf; + int nx7d; + int nx5f; + int nxb; + int nx12f; + int nx5b; + int nx4f; + int nx8i; + int nx8f; + + FILE *fptr; + FILE *fptr_try; + FILE *fptr_out; + int i; + long long num_rows; + + if (argc != 3) { + printf("usage: supercosmos_binary input_file output_file\n"); + exit(1); + } + + fptr = fopen(argv[1], "rb"); + if (fptr == NULL) { + printf("error: could not open input file %s\n", argv[1]); + exit(1); + } + + printf("info:input_file=%s\n", argv[1]); + + fptr_try = fopen(argv[2], "r"); + if (fptr_try != NULL) { + printf("error: output file exists %s\n", argv[2]); + fclose(fptr_try); + exit(1); + } + + fptr_out = fopen(argv[2], "w"); + if (fptr_out == NULL) { + printf("error: could not open output file %s\n", argv[2]); + exit(1); + } + + printf("info:output_file=%s\n", argv[2]); + + num_rows = 0; + // start loop to read rows + while (1) { + + // read binary data + nx6q = fread(x6q, sizeof(long long), 6, fptr); + nxf = fread(xf, sizeof(float), 1, fptr); + nx7d = fread(x7d, sizeof(double), 7, fptr); + nx5f = fread(x5f, sizeof(float), 5, fptr); + nxb = fread(xb, sizeof(signed char), 1, fptr); + nx12f = fread(x12f, sizeof(float), 12, fptr); + nx5b = fread(x5b, sizeof(signed char), 5, fptr); + nx4f = fread(x4f, sizeof(float), 4, fptr); + nx8i = fread(x8i, sizeof(int), 8, fptr); + nx8f = fread(x8f, sizeof(float), 8, fptr); + + // Reached EOF so break out of loop + if (nx6q == 0) { + printf("info:converted %lld rows to CSV.\n", num_rows); + break; + } + + // if incomplete row then exit + if (nx6q != 6) { + printf("error:incomplete row:expected nx6q=6:got nx6q=%d\n", nx6q); + exit(1); + } + if (nxf != 1) { + printf("error:incomplete row:expected nxf=1:got nxf=%d\n", nxf); + exit(1); + } + if (nx7d != 7) { + printf("error:incomplete row:expected nx7d=7:got nx7d=%d\n", nx7d); + exit(1); + } + if (nx5f != 5) { + printf("error:incomplete row:expected nx5f=5:got nx5f=%d\n", nx5f); + exit(1); + } + if (nxb != 1) { + printf("error:incomplete row:expected nxb=1:got nxb=%d\n", nxb); + exit(1); + } + if (nx12f != 12) { + printf("error:incomplete row:expected nx12f=12:got nx12f=%d\n", + nx12f); + exit(1); + } + if (nx5b != 5) { + printf("error:incomplete row:expected nx5b=5:got nx5b=%d\n", nx5b); + exit(1); + } + if (nx4f != 4) { + printf("error:incomplete row:expected nx4f=4:got nx4f=%d\n", nx4f); + exit(1); + } + if (nx8i != 8) { + printf("error:incomplete row:expected nx8i=8:got nx8i=%d\n", nx8i); + exit(1); + } + if (nx8f != 8) { + printf("error:incomplete row:expected nx8f=8:got nx8f=%d\n", nx8f); + exit(1); + } + + // complete row read so increment num_rows + num_rows = num_rows + 1; + + // write CSV data + for (i = 0; i < 6; i++) { + fprintf(fptr_out, "%lld,", x6q[i]); + } + + fprintf(fptr_out, "%.8e,", xf[0]); + + for (i = 0; i < 7; i++) { + fprintf(fptr_out, "%.16e,", x7d[i]); + } + + for (i = 0; i < 5; i++) { + fprintf(fptr_out, "%.8e,", x5f[i]); + } + + fprintf(fptr_out, "%hhd,", xb[0]); + + for (i = 0; i < 12; i++) { + fprintf(fptr_out, "%.8e,", x12f[i]); + } + + for (i = 0; i < 5; i++) { + fprintf(fptr_out, "%hhd,", x5b[i]); + } + + for (i = 0; i < 4; i++) { + fprintf(fptr_out, "%.8e,", x4f[i]); + } + + for (i = 0; i < 8; i++) { + fprintf(fptr_out, "%d,", x8i[i]); + } + + for (i = 0; i < 8; i++) { + if (i < 7) { + fprintf(fptr_out, "%.8e,", x8f[i]); + } else { + fprintf(fptr_out, "%.8e", x8f[i]); + } + } + + fprintf(fptr_out, "\n"); + } + // end loop to read data + + fclose(fptr); + fclose(fptr_out); + return 0; +} diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_indexes.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_indexes.sql new file mode 100644 index 00000000..08f1fa57 --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_indexes.sql @@ -0,0 +1,11 @@ +\o supercosmos_create_indexes.out +create index on catalogdb.supercosmos(q3c_ang2ipix(ra,dec)); +create index on catalogdb.supercosmos(classB); +create index on catalogdb.supercosmos(classR1); +create index on catalogdb.supercosmos(classR2); +create index on catalogdb.supercosmos(classI); +create index on catalogdb.supercosmos(classMagB); +create index on catalogdb.supercosmos(classMagR1); +create index on catalogdb.supercosmos(classMagR2); +create index on catalogdb.supercosmos(classMagI); +\o diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql new file mode 100644 index 00000000..10f75991 --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql @@ -0,0 +1,66 @@ +-- The create table statement is based on +-- the column names in the SuperCOSMOS README and +-- the column types in readSSABinary.py +-- +-- The below link has more details +-- http://ssa.roe.ac.uk/www/SSA_TABLE_SourceSchema.html#Source + +create table catalogdb.supercosmos( +objID bigint, +objIDB bigint, +objIDR1 bigint, +objIDR2 bigint, +objIDI bigint, +htmId bigint, +epoch real, +ra double precision, +dec double precision, +sigRA double precision, +sigDec double precision, +cx double precision, +cy double precision, +cz double precision, +muAcosD real, +muD real, +sigMuAcosD real, +sigMuD real, +chi2 real, +Nplates smallint, +classMagB real, +classMagR1 real, +classMagR2 real, +classMagI real, +gCorMagB real, +gCorMagR1 real, +gCorMagR2 real, +gCorMagI real, +sCorMagB real, +sCorMagR1 real, +sCorMagR2 real, +sCorMagI real, +meanClass smallint, +classB smallint, +classR1 smallint, +classR2 smallint, +classI smallint, +ellipB real, +ellipR1 real, +ellipR2 real, +ellipI real, +qualB integer, +qualR1 integer, +qualR2 integer, +qualI integer, +blendB integer, +blendR1 integer, +blendR2 integer, +blendI integer, +prfStatB real, +prfStatR1 real, +prfStatR2 real, +prfStatI real, +l real, +b real, +d real, +Ebmv real +); diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py b/schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py new file mode 100644 index 00000000..f255a805 --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py @@ -0,0 +1,86 @@ +# Program: supercosmos_load.py +# Aim: load supercosmos csv files into the postgreSQL sdss5db database. +# +# The program checks so that it does not reload csv files which already +# have a csv.load.out file. +# + +import glob +import os.path + + +DEBUG = False + +# Note that csv_dir and csvout_dir must end with / + +csv_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/" # noqa E501 +csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csvout/" # noqa E501 + +list_of_csv_files = glob.glob(csv_dir + "ssaSource*ra*.csv") + +list_of_csv_files.sort() + +fout = open(csvout_dir + "load.csv.out", "a") + +if DEBUG is True: + list_of_csv_files = [csv_dir + "ssaSource000ra030.bin"] + +for i in range(len(list_of_csv_files)): + full_csv_file = list_of_csv_files[i] + csv_file = os.path.basename(full_csv_file) + csvout_file = csv_file.replace('.csv', '.csv.out') + csvsql_file = csv_file.replace('.csv', '.csv.sql') + + # both csvout_file and csvsql_file are put in csvout_dir directory + full_csvout_file = csvout_dir + csvout_file + full_csvsql_file = csvout_dir + csvsql_file + # if csv file exists then skip this csv file and goto next csv file + if os.path.isfile(full_csvout_file): + print("skipping loading csv file since csv.out file already exists:", + csvout_file) + print("skipping loading csv file since csv.out file already exists:", + csvout_file, + file=fout, flush=True) + continue + + fpgscript = open(full_csvsql_file, "w") + print("\\o " + full_csvout_file, file=fpgscript) + print("\\copy catalogdb.supercosmos ", file=fpgscript, end='') + print("from '" + full_csv_file + "' ", file=fpgscript, end='') + print("delimiter ',' ; ", file=fpgscript) + print("\\o", file=fpgscript) + print("\\q", file=fpgscript) + fpgscript.close() + + print("load start:", csv_file) + print("load start:", csv_file, file=fout, flush=True) + + pgcopy_output = os.popen("psql -U postgres sdss5db " + + " -a -f " + full_csvsql_file).read() + + wc_output = os.popen("wc -l " + full_csv_file).read() + num_lines_str, file_name = wc_output.split() + num_lines = int(num_lines_str) + print(csv_file, ":contains:", num_lines) + print(csv_file, ":contains:", num_lines, file=fout, flush=True) + + fcsvout = open(full_csvout_file, "r") + line = fcsvout.readline() + copytext, num_rows_loaded_str = line.split() + num_rows_loaded = int(num_rows_loaded_str) + print(csvout_file, ":loaded:", num_rows_loaded) + print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True) + fcsvout.close() + + if(num_lines != num_rows_loaded): + print("load error:num_lines!=num_rows_loaded", csv_file) + print("load error:num_lines!=num_rows_loaded", + csv_file, file=fout, flush=True) + + print("load end:", csv_file) + print("load end:", csv_file, file=fout, flush=True) + +print("loaded all csv files") +print("loaded all csv files", file=fout, flush=True) + +fout.close() diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py b/schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py new file mode 100644 index 00000000..3bb41467 --- /dev/null +++ b/schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py @@ -0,0 +1,37 @@ +# Program: supercosmos_make_bin2csv.py +# Aim: make bash script to convert supercosmos .bin files to csv. +# +# Usage: +# python supercosmos_make_bin2csv.py > supercosmos_bin2csv.sh +# and then run +# bash supercosmos_bin2csv.sh & +# + +# input_dir and output_dir must end with / +input_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/" # noqa: E501 +output_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/" # noqa: E501 + +# This list is from the SuperCOSMOS filelist. +list_of_bin_files = ["ssaSource000ra030.bin", + "ssaSource030ra060.bin", + "ssaSource060ra090.bin", + "ssaSource090ra120.bin", + "ssaSource120ra150.bin", + "ssaSource150ra180.bin", + "ssaSource180ra210.bin", + "ssaSource210ra240.bin", + "ssaSource240ra270.bin", + "ssaSource270ra300.bin", + "ssaSource300ra330.bin", + "ssaSource330ra360.bin"] + +for i in range(len(list_of_bin_files)): + bin_file = list_of_bin_files[i] + base_file, extension = bin_file.split('.') + csv_file = base_file + ".csv" + full_bin_file = input_dir + bin_file + full_csv_file = output_dir + csv_file + print("./supercosmos_binary " + + full_bin_file + " " + full_csv_file + " > " + + full_csv_file + ".out") + print("")