Merge pull request #42 from sdss/supercosmos

Supercosmos
sdss · Oct 21, 2020 · c944dfa · c944dfa
2 parents a124bf0 + e75a7b5
commit c944dfa
Show file tree

Hide file tree

Showing 6 changed files with 389 additions and 0 deletions.
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_alter_table_pkey.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_alter_table_pkey.sql
@@ -0,0 +1,3 @@
+\o supercosmos_alter_table_pkey.out
+alter table catalogdb.supercosmos add primary key (objID);
+\o
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c b/schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c
@@ -0,0 +1,186 @@
+// Program supercosmos_binary.c
+// Aim: Read a SuperCOSMOS binary data file and output a CSV file
+//
+// Python struct module format from readSSABinary.py:
+// '<6q f 7d 5f b 12f 5b 4f 8i 8f'
+//
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main(int argc, char *argv[]) {
+
+    long long x6q[6];
+    float xf[1];
+    double x7d[7];
+    float x5f[5];
+    signed char xb[1];
+    float x12f[12];
+    signed char x5b[5];
+    float x4f[4];
+    int x8i[8];
+    float x8f[8];
+
+    int nx6q;
+    int nxf;
+    int nx7d;
+    int nx5f;
+    int nxb;
+    int nx12f;
+    int nx5b;
+    int nx4f;
+    int nx8i;
+    int nx8f;
+
+    FILE *fptr;
+    FILE *fptr_try;
+    FILE *fptr_out;
+    int i;
+    long long num_rows;
+
+    if (argc != 3) {
+        printf("usage: supercosmos_binary input_file output_file\n");
+        exit(1);
+    }
+
+    fptr = fopen(argv[1], "rb");
+    if (fptr == NULL) {
+        printf("error: could not open input file %s\n", argv[1]);
+        exit(1);
+    }
+
+    printf("info:input_file=%s\n", argv[1]);
+
+    fptr_try = fopen(argv[2], "r");
+    if (fptr_try != NULL) {
+        printf("error: output file exists %s\n", argv[2]);
+        fclose(fptr_try);
+        exit(1);
+    }
+
+    fptr_out = fopen(argv[2], "w");
+    if (fptr_out == NULL) {
+        printf("error: could not open output file %s\n", argv[2]);
+        exit(1);
+    }
+
+    printf("info:output_file=%s\n", argv[2]);
+
+    num_rows = 0;
+    // start loop to read rows
+    while (1) {
+
+        // read binary data
+        nx6q = fread(x6q, sizeof(long long), 6, fptr);
+        nxf = fread(xf, sizeof(float), 1, fptr);
+        nx7d = fread(x7d, sizeof(double), 7, fptr);
+        nx5f = fread(x5f, sizeof(float), 5, fptr);
+        nxb = fread(xb, sizeof(signed char), 1, fptr);
+        nx12f = fread(x12f, sizeof(float), 12, fptr);
+        nx5b = fread(x5b, sizeof(signed char), 5, fptr);
+        nx4f = fread(x4f, sizeof(float), 4, fptr);
+        nx8i = fread(x8i, sizeof(int), 8, fptr);
+        nx8f = fread(x8f, sizeof(float), 8, fptr);
+
+        // Reached EOF so break out of loop
+        if (nx6q == 0) {
+            printf("info:converted %lld rows to CSV.\n", num_rows);
+            break;
+        }
+
+        // if incomplete row then exit
+        if (nx6q != 6) {
+            printf("error:incomplete row:expected nx6q=6:got nx6q=%d\n", nx6q);
+            exit(1);
+        }
+        if (nxf != 1) {
+            printf("error:incomplete row:expected nxf=1:got nxf=%d\n", nxf);
+            exit(1);
+        }
+        if (nx7d != 7) {
+            printf("error:incomplete row:expected nx7d=7:got nx7d=%d\n", nx7d);
+            exit(1);
+        }
+        if (nx5f != 5) {
+            printf("error:incomplete row:expected nx5f=5:got nx5f=%d\n", nx5f);
+            exit(1);
+        }
+        if (nxb != 1) {
+            printf("error:incomplete row:expected nxb=1:got nxb=%d\n", nxb);
+            exit(1);
+        }
+        if (nx12f != 12) {
+            printf("error:incomplete row:expected nx12f=12:got nx12f=%d\n",
+                   nx12f);
+            exit(1);
+        }
+        if (nx5b != 5) {
+            printf("error:incomplete row:expected nx5b=5:got nx5b=%d\n", nx5b);
+            exit(1);
+        }
+        if (nx4f != 4) {
+            printf("error:incomplete row:expected nx4f=4:got nx4f=%d\n", nx4f);
+            exit(1);
+        }
+        if (nx8i != 8) {
+            printf("error:incomplete row:expected nx8i=8:got nx8i=%d\n", nx8i);
+            exit(1);
+        }
+        if (nx8f != 8) {
+            printf("error:incomplete row:expected nx8f=8:got nx8f=%d\n", nx8f);
+            exit(1);
+        }
+
+        // complete row read so increment num_rows
+        num_rows = num_rows + 1;
+
+        // write CSV data
+        for (i = 0; i < 6; i++) {
+            fprintf(fptr_out, "%lld,", x6q[i]);
+        }
+
+        fprintf(fptr_out, "%.8e,", xf[0]);
+
+        for (i = 0; i < 7; i++) {
+            fprintf(fptr_out, "%.16e,", x7d[i]);
+        }
+
+        for (i = 0; i < 5; i++) {
+            fprintf(fptr_out, "%.8e,", x5f[i]);
+        }
+
+        fprintf(fptr_out, "%hhd,", xb[0]);
+
+        for (i = 0; i < 12; i++) {
+            fprintf(fptr_out, "%.8e,", x12f[i]);
+        }
+
+        for (i = 0; i < 5; i++) {
+            fprintf(fptr_out, "%hhd,", x5b[i]);
+        }
+
+        for (i = 0; i < 4; i++) {
+            fprintf(fptr_out, "%.8e,", x4f[i]);
+        }
+
+        for (i = 0; i < 8; i++) {
+            fprintf(fptr_out, "%d,", x8i[i]);
+        }
+
+        for (i = 0; i < 8; i++) {
+            if (i < 7) {
+                fprintf(fptr_out, "%.8e,", x8f[i]);
+            } else {
+                fprintf(fptr_out, "%.8e", x8f[i]);
+            }
+        }
+
+        fprintf(fptr_out, "\n");
+    }
+    // end loop to read data
+
+    fclose(fptr);
+    fclose(fptr_out);
+    return 0;
+}
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_indexes.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_indexes.sql
@@ -0,0 +1,11 @@
+\o supercosmos_create_indexes.out
+create index on catalogdb.supercosmos(q3c_ang2ipix(ra,dec));
+create index on catalogdb.supercosmos(classB);
+create index on catalogdb.supercosmos(classR1);
+create index on catalogdb.supercosmos(classR2);
+create index on catalogdb.supercosmos(classI);
+create index on catalogdb.supercosmos(classMagB);
+create index on catalogdb.supercosmos(classMagR1);
+create index on catalogdb.supercosmos(classMagR2);
+create index on catalogdb.supercosmos(classMagI);
+\o
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql b/schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql
@@ -0,0 +1,66 @@
+-- The create table statement is based on 
+-- the column names in the SuperCOSMOS README and 
+-- the column types in readSSABinary.py
+--
+-- The below link has more details
+-- http://ssa.roe.ac.uk/www/SSA_TABLE_SourceSchema.html#Source
+
+create table catalogdb.supercosmos(
+objID bigint,
+objIDB bigint,
+objIDR1 bigint,
+objIDR2 bigint,
+objIDI bigint,
+htmId bigint,
+epoch real,
+ra double precision,
+dec double precision,
+sigRA double precision,
+sigDec double precision,
+cx double precision,
+cy double precision,
+cz double precision,
+muAcosD real,
+muD real,
+sigMuAcosD real,
+sigMuD real,
+chi2 real,
+Nplates smallint,
+classMagB real,
+classMagR1 real,
+classMagR2 real,
+classMagI real,
+gCorMagB real,
+gCorMagR1 real,
+gCorMagR2 real,
+gCorMagI real,
+sCorMagB real,
+sCorMagR1 real,
+sCorMagR2 real,
+sCorMagI real,
+meanClass smallint,
+classB smallint,
+classR1 smallint,
+classR2 smallint,
+classI smallint,
+ellipB real,
+ellipR1 real,
+ellipR2 real,
+ellipI real,
+qualB integer,
+qualR1 integer,
+qualR2 integer,
+qualI integer,
+blendB integer,
+blendR1 integer,
+blendR2 integer,
+blendI integer,
+prfStatB real,
+prfStatR1 real,
+prfStatR2 real,
+prfStatI real,
+l real,
+b real,
+d real,
+Ebmv real
+);
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py b/schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py
@@ -0,0 +1,86 @@
+# Program: supercosmos_load.py
+# Aim: load supercosmos csv files into the postgreSQL sdss5db database.
+#
+# The program checks so that it does not reload csv files which already
+# have a csv.load.out file.
+#
+
+import glob
+import os.path
+
+
+DEBUG = False
+
+# Note that csv_dir and csvout_dir must end with /
+
+csv_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/"  # noqa E501
+csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csvout/"  # noqa E501
+
+list_of_csv_files = glob.glob(csv_dir + "ssaSource*ra*.csv")
+
+list_of_csv_files.sort()
+
+fout = open(csvout_dir + "load.csv.out", "a")
+
+if DEBUG is True:
+    list_of_csv_files = [csv_dir + "ssaSource000ra030.bin"]
+
+for i in range(len(list_of_csv_files)):
+    full_csv_file = list_of_csv_files[i]
+    csv_file = os.path.basename(full_csv_file)
+    csvout_file = csv_file.replace('.csv', '.csv.out')
+    csvsql_file = csv_file.replace('.csv', '.csv.sql')
+
+    # both csvout_file and csvsql_file are put in csvout_dir directory
+    full_csvout_file = csvout_dir + csvout_file
+    full_csvsql_file = csvout_dir + csvsql_file
+    # if csv file exists then skip this csv file and goto next csv file
+    if os.path.isfile(full_csvout_file):
+        print("skipping loading csv file since csv.out file already exists:",
+              csvout_file)
+        print("skipping loading csv file since csv.out file already exists:",
+              csvout_file,
+              file=fout, flush=True)
+        continue
+
+    fpgscript = open(full_csvsql_file, "w")
+    print("\\o " + full_csvout_file, file=fpgscript)
+    print("\\copy catalogdb.supercosmos ", file=fpgscript, end='')
+    print("from '" + full_csv_file + "' ", file=fpgscript, end='')
+    print("delimiter ',' ; ", file=fpgscript)
+    print("\\o", file=fpgscript)
+    print("\\q", file=fpgscript)
+    fpgscript.close()
+
+    print("load start:", csv_file)
+    print("load start:", csv_file, file=fout, flush=True)
+
+    pgcopy_output = os.popen("psql -U postgres sdss5db " +
+                             " -a -f " + full_csvsql_file).read()
+
+    wc_output = os.popen("wc -l " + full_csv_file).read()
+    num_lines_str, file_name = wc_output.split()
+    num_lines = int(num_lines_str)
+    print(csv_file, ":contains:", num_lines)
+    print(csv_file, ":contains:", num_lines, file=fout, flush=True)
+
+    fcsvout = open(full_csvout_file, "r")
+    line = fcsvout.readline()
+    copytext, num_rows_loaded_str = line.split()
+    num_rows_loaded = int(num_rows_loaded_str)
+    print(csvout_file, ":loaded:", num_rows_loaded)
+    print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True)
+    fcsvout.close()
+
+    if(num_lines != num_rows_loaded):
+        print("load error:num_lines!=num_rows_loaded", csv_file)
+        print("load error:num_lines!=num_rows_loaded",
+              csv_file, file=fout, flush=True)
+
+    print("load end:", csv_file)
+    print("load end:", csv_file, file=fout, flush=True)
+
+print("loaded all csv files")
+print("loaded all csv files", file=fout, flush=True)
+
+fout.close()
diff --git a/schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py b/schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py
@@ -0,0 +1,37 @@
+# Program: supercosmos_make_bin2csv.py
+# Aim: make bash script to convert supercosmos .bin files to csv.
+#
+# Usage:
+# python supercosmos_make_bin2csv.py > supercosmos_bin2csv.sh
+# and then run
+# bash supercosmos_bin2csv.sh &
+#
+
+# input_dir and output_dir must end with /
+input_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/"  # noqa: E501
+output_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/"  # noqa: E501
+
+# This list is from the SuperCOSMOS filelist.
+list_of_bin_files = ["ssaSource000ra030.bin",
+                     "ssaSource030ra060.bin",
+                     "ssaSource060ra090.bin",
+                     "ssaSource090ra120.bin",
+                     "ssaSource120ra150.bin",
+                     "ssaSource150ra180.bin",
+                     "ssaSource180ra210.bin",
+                     "ssaSource210ra240.bin",
+                     "ssaSource240ra270.bin",
+                     "ssaSource270ra300.bin",
+                     "ssaSource300ra330.bin",
+                     "ssaSource330ra360.bin"]
+
+for i in range(len(list_of_bin_files)):
+    bin_file = list_of_bin_files[i]
+    base_file, extension = bin_file.split('.')
+    csv_file = base_file + ".csv"
+    full_bin_file = input_dir + bin_file
+    full_csv_file = output_dir + csv_file
+    print("./supercosmos_binary " +
+          full_bin_file + " " + full_csv_file + " > " +
+          full_csv_file + ".out")
+    print("")