Skip to content

Commit

Permalink
Merge pull request #42 from sdss/supercosmos
Browse files Browse the repository at this point in the history
Supercosmos
  • Loading branch information
astronomygupta committed Oct 21, 2020
2 parents a124bf0 + e75a7b5 commit c944dfa
Show file tree
Hide file tree
Showing 6 changed files with 389 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
\o supercosmos_alter_table_pkey.out
alter table catalogdb.supercosmos add primary key (objID);
\o
186 changes: 186 additions & 0 deletions schema/sdss5db/catalogdb/supercosmos/supercosmos_binary.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Program supercosmos_binary.c
// Aim: Read a SuperCOSMOS binary data file and output a CSV file
//
// Python struct module format from readSSABinary.py:
// '<6q f 7d 5f b 12f 5b 4f 8i 8f'
//

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char *argv[]) {

long long x6q[6];
float xf[1];
double x7d[7];
float x5f[5];
signed char xb[1];
float x12f[12];
signed char x5b[5];
float x4f[4];
int x8i[8];
float x8f[8];

int nx6q;
int nxf;
int nx7d;
int nx5f;
int nxb;
int nx12f;
int nx5b;
int nx4f;
int nx8i;
int nx8f;

FILE *fptr;
FILE *fptr_try;
FILE *fptr_out;
int i;
long long num_rows;

if (argc != 3) {
printf("usage: supercosmos_binary input_file output_file\n");
exit(1);
}

fptr = fopen(argv[1], "rb");
if (fptr == NULL) {
printf("error: could not open input file %s\n", argv[1]);
exit(1);
}

printf("info:input_file=%s\n", argv[1]);

fptr_try = fopen(argv[2], "r");
if (fptr_try != NULL) {
printf("error: output file exists %s\n", argv[2]);
fclose(fptr_try);
exit(1);
}

fptr_out = fopen(argv[2], "w");
if (fptr_out == NULL) {
printf("error: could not open output file %s\n", argv[2]);
exit(1);
}

printf("info:output_file=%s\n", argv[2]);

num_rows = 0;
// start loop to read rows
while (1) {

// read binary data
nx6q = fread(x6q, sizeof(long long), 6, fptr);
nxf = fread(xf, sizeof(float), 1, fptr);
nx7d = fread(x7d, sizeof(double), 7, fptr);
nx5f = fread(x5f, sizeof(float), 5, fptr);
nxb = fread(xb, sizeof(signed char), 1, fptr);
nx12f = fread(x12f, sizeof(float), 12, fptr);
nx5b = fread(x5b, sizeof(signed char), 5, fptr);
nx4f = fread(x4f, sizeof(float), 4, fptr);
nx8i = fread(x8i, sizeof(int), 8, fptr);
nx8f = fread(x8f, sizeof(float), 8, fptr);

// Reached EOF so break out of loop
if (nx6q == 0) {
printf("info:converted %lld rows to CSV.\n", num_rows);
break;
}

// if incomplete row then exit
if (nx6q != 6) {
printf("error:incomplete row:expected nx6q=6:got nx6q=%d\n", nx6q);
exit(1);
}
if (nxf != 1) {
printf("error:incomplete row:expected nxf=1:got nxf=%d\n", nxf);
exit(1);
}
if (nx7d != 7) {
printf("error:incomplete row:expected nx7d=7:got nx7d=%d\n", nx7d);
exit(1);
}
if (nx5f != 5) {
printf("error:incomplete row:expected nx5f=5:got nx5f=%d\n", nx5f);
exit(1);
}
if (nxb != 1) {
printf("error:incomplete row:expected nxb=1:got nxb=%d\n", nxb);
exit(1);
}
if (nx12f != 12) {
printf("error:incomplete row:expected nx12f=12:got nx12f=%d\n",
nx12f);
exit(1);
}
if (nx5b != 5) {
printf("error:incomplete row:expected nx5b=5:got nx5b=%d\n", nx5b);
exit(1);
}
if (nx4f != 4) {
printf("error:incomplete row:expected nx4f=4:got nx4f=%d\n", nx4f);
exit(1);
}
if (nx8i != 8) {
printf("error:incomplete row:expected nx8i=8:got nx8i=%d\n", nx8i);
exit(1);
}
if (nx8f != 8) {
printf("error:incomplete row:expected nx8f=8:got nx8f=%d\n", nx8f);
exit(1);
}

// complete row read so increment num_rows
num_rows = num_rows + 1;

// write CSV data
for (i = 0; i < 6; i++) {
fprintf(fptr_out, "%lld,", x6q[i]);
}

fprintf(fptr_out, "%.8e,", xf[0]);

for (i = 0; i < 7; i++) {
fprintf(fptr_out, "%.16e,", x7d[i]);
}

for (i = 0; i < 5; i++) {
fprintf(fptr_out, "%.8e,", x5f[i]);
}

fprintf(fptr_out, "%hhd,", xb[0]);

for (i = 0; i < 12; i++) {
fprintf(fptr_out, "%.8e,", x12f[i]);
}

for (i = 0; i < 5; i++) {
fprintf(fptr_out, "%hhd,", x5b[i]);
}

for (i = 0; i < 4; i++) {
fprintf(fptr_out, "%.8e,", x4f[i]);
}

for (i = 0; i < 8; i++) {
fprintf(fptr_out, "%d,", x8i[i]);
}

for (i = 0; i < 8; i++) {
if (i < 7) {
fprintf(fptr_out, "%.8e,", x8f[i]);
} else {
fprintf(fptr_out, "%.8e", x8f[i]);
}
}

fprintf(fptr_out, "\n");
}
// end loop to read data

fclose(fptr);
fclose(fptr_out);
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
\o supercosmos_create_indexes.out
create index on catalogdb.supercosmos(q3c_ang2ipix(ra,dec));
create index on catalogdb.supercosmos(classB);
create index on catalogdb.supercosmos(classR1);
create index on catalogdb.supercosmos(classR2);
create index on catalogdb.supercosmos(classI);
create index on catalogdb.supercosmos(classMagB);
create index on catalogdb.supercosmos(classMagR1);
create index on catalogdb.supercosmos(classMagR2);
create index on catalogdb.supercosmos(classMagI);
\o
66 changes: 66 additions & 0 deletions schema/sdss5db/catalogdb/supercosmos/supercosmos_create_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
-- The create table statement is based on
-- the column names in the SuperCOSMOS README and
-- the column types in readSSABinary.py
--
-- The below link has more details
-- http://ssa.roe.ac.uk/www/SSA_TABLE_SourceSchema.html#Source

create table catalogdb.supercosmos(
objID bigint,
objIDB bigint,
objIDR1 bigint,
objIDR2 bigint,
objIDI bigint,
htmId bigint,
epoch real,
ra double precision,
dec double precision,
sigRA double precision,
sigDec double precision,
cx double precision,
cy double precision,
cz double precision,
muAcosD real,
muD real,
sigMuAcosD real,
sigMuD real,
chi2 real,
Nplates smallint,
classMagB real,
classMagR1 real,
classMagR2 real,
classMagI real,
gCorMagB real,
gCorMagR1 real,
gCorMagR2 real,
gCorMagI real,
sCorMagB real,
sCorMagR1 real,
sCorMagR2 real,
sCorMagI real,
meanClass smallint,
classB smallint,
classR1 smallint,
classR2 smallint,
classI smallint,
ellipB real,
ellipR1 real,
ellipR2 real,
ellipI real,
qualB integer,
qualR1 integer,
qualR2 integer,
qualI integer,
blendB integer,
blendR1 integer,
blendR2 integer,
blendI integer,
prfStatB real,
prfStatR1 real,
prfStatR2 real,
prfStatI real,
l real,
b real,
d real,
Ebmv real
);
86 changes: 86 additions & 0 deletions schema/sdss5db/catalogdb/supercosmos/supercosmos_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Program: supercosmos_load.py
# Aim: load supercosmos csv files into the postgreSQL sdss5db database.
#
# The program checks so that it does not reload csv files which already
# have a csv.load.out file.
#

import glob
import os.path


DEBUG = False

# Note that csv_dir and csvout_dir must end with /

csv_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/" # noqa E501
csvout_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csvout/" # noqa E501

list_of_csv_files = glob.glob(csv_dir + "ssaSource*ra*.csv")

list_of_csv_files.sort()

fout = open(csvout_dir + "load.csv.out", "a")

if DEBUG is True:
list_of_csv_files = [csv_dir + "ssaSource000ra030.bin"]

for i in range(len(list_of_csv_files)):
full_csv_file = list_of_csv_files[i]
csv_file = os.path.basename(full_csv_file)
csvout_file = csv_file.replace('.csv', '.csv.out')
csvsql_file = csv_file.replace('.csv', '.csv.sql')

# both csvout_file and csvsql_file are put in csvout_dir directory
full_csvout_file = csvout_dir + csvout_file
full_csvsql_file = csvout_dir + csvsql_file
# if csv file exists then skip this csv file and goto next csv file
if os.path.isfile(full_csvout_file):
print("skipping loading csv file since csv.out file already exists:",
csvout_file)
print("skipping loading csv file since csv.out file already exists:",
csvout_file,
file=fout, flush=True)
continue

fpgscript = open(full_csvsql_file, "w")
print("\\o " + full_csvout_file, file=fpgscript)
print("\\copy catalogdb.supercosmos ", file=fpgscript, end='')
print("from '" + full_csv_file + "' ", file=fpgscript, end='')
print("delimiter ',' ; ", file=fpgscript)
print("\\o", file=fpgscript)
print("\\q", file=fpgscript)
fpgscript.close()

print("load start:", csv_file)
print("load start:", csv_file, file=fout, flush=True)

pgcopy_output = os.popen("psql -U postgres sdss5db " +
" -a -f " + full_csvsql_file).read()

wc_output = os.popen("wc -l " + full_csv_file).read()
num_lines_str, file_name = wc_output.split()
num_lines = int(num_lines_str)
print(csv_file, ":contains:", num_lines)
print(csv_file, ":contains:", num_lines, file=fout, flush=True)

fcsvout = open(full_csvout_file, "r")
line = fcsvout.readline()
copytext, num_rows_loaded_str = line.split()
num_rows_loaded = int(num_rows_loaded_str)
print(csvout_file, ":loaded:", num_rows_loaded)
print(csvout_file, ":loaded:", num_rows_loaded, file=fout, flush=True)
fcsvout.close()

if(num_lines != num_rows_loaded):
print("load error:num_lines!=num_rows_loaded", csv_file)
print("load error:num_lines!=num_rows_loaded",
csv_file, file=fout, flush=True)

print("load end:", csv_file)
print("load end:", csv_file, file=fout, flush=True)

print("loaded all csv files")
print("loaded all csv files", file=fout, flush=True)

fout.close()
37 changes: 37 additions & 0 deletions schema/sdss5db/catalogdb/supercosmos/supercosmos_make_bin2csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Program: supercosmos_make_bin2csv.py
# Aim: make bash script to convert supercosmos .bin files to csv.
#
# Usage:
# python supercosmos_make_bin2csv.py > supercosmos_bin2csv.sh
# and then run
# bash supercosmos_bin2csv.sh &
#

# input_dir and output_dir must end with /
input_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/" # noqa: E501
output_dir = "/uufs/chpc.utah.edu/common/home/sdss10/sdss5/target/catalogs/SuperCOSMOS/csv/" # noqa: E501

# This list is from the SuperCOSMOS filelist.
list_of_bin_files = ["ssaSource000ra030.bin",
"ssaSource030ra060.bin",
"ssaSource060ra090.bin",
"ssaSource090ra120.bin",
"ssaSource120ra150.bin",
"ssaSource150ra180.bin",
"ssaSource180ra210.bin",
"ssaSource210ra240.bin",
"ssaSource240ra270.bin",
"ssaSource270ra300.bin",
"ssaSource300ra330.bin",
"ssaSource330ra360.bin"]

for i in range(len(list_of_bin_files)):
bin_file = list_of_bin_files[i]
base_file, extension = bin_file.split('.')
csv_file = base_file + ".csv"
full_bin_file = input_dir + bin_file
full_csv_file = output_dir + csv_file
print("./supercosmos_binary " +
full_bin_file + " " + full_csv_file + " > " +
full_csv_file + ".out")
print("")

0 comments on commit c944dfa

Please sign in to comment.