# GKGZ Finalise Notebook

This notebook takes in the final debaised output catalogue, merges with the additional extra information columns from the GAMA database, and generates additional lookup convenience columns.

In [1]:
# imports
import os
import sys
import copy
import glob
import time
import warnings
import functools
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.anchored_artists import (
    AnchoredSizeBar as scalebar
)
from mpl_toolkits.axes_grid1 import make_axes_locatable
from collections import Counter
from scipy.ndimage import gaussian_filter
import astropy.units as u
from astropy.table import Table, join
from astropy.cosmology import FlatLambdaCDM
from astropy.coordinates import (SkyCoord, match_coordinates_sky)
import multiprocessing

# startup
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'facecolor':"w"}
def timecheck(start = None, timefmt = '%Y-%m-%d %H:%M:%S %Z'):
    now = time.time()
    out = time.strftime(timefmt, time.localtime(now))
    if start:
        diff = time.strftime('%H:%M:%S', time.gmtime(now-start))
        out += f' (elapsed: {diff})'
    return [now, out]
nbt0, nbt0str = timecheck()
print(f'Notebook start time:\n   {nbt0str}')
#--------------------------------68-------------------------------->

# one-liners
def seq(a, b, by): return np.arange(a, b + by/2, by)
def npmad(x): return 1.4826*np.nanmedian(np.abs(x-np.nanmedian(x)))
def ddir(x): return [y for y in dir(x) if not y.startswith('__')]
def minmax(x): return [np.nanmin(x), np.nanmax(x)]
def extendrange(x,f=0.05):u,v=minmax(x);d=(v-u)*(1+f);return v-d,u+d
def findin(x, y): return [l for l in list(x) if y in str(l)]
def z2dist(z): return FlatLambdaCDM(70, 0.3).luminosity_distance(z)
def mag2Mag(m, z): return m+5-5*np.log10(z2dist(z).to(u.pc).value)
def stripnan(x): return np.asarray(x)[~np.isnan(x)]
def qindex(a, q): return abs(a - np.percentile(a, q*100)).argmin()

Notebook start time:
   2020-12-09 20:42:05 CST


In [2]:
# input data
cln_file = '../../outputs/gkgz-cleaned.fits.gz'
deb_file = '../../outputs/gkgz-debiased.fits.gz'
ext_file = '../../outputs/gkgz-extra.fits.gz'
# read in datasets (using ext as dat base)
dat = Table.read(ext_file)
deb = Table.read(deb_file)

In [3]:
# remove unwanted columns
badcols = [x for x in deb.columns.keys()
           if (x in dat.columns.keys() and x != 'CATAID')]
deb.remove_columns(badcols)

In [4]:
# reorder columns prior to merge
questions = [x.split('_')[0] for x in deb.columns if '_total' in x]
columns = []
for qq in questions:
    columns.append([x for x in deb.columns
                    if f'LSK{qq}_' in f'LSK{x}'])
columns = [item for sublist in columns for item in sublist]
columns = ['CATAID'] + columns
deb = deb[columns]

In [5]:
# merge
dat = join(dat, deb, keys='CATAID', join_type='left',
           metadata_conflicts='silent')

In [6]:
# add FILENAME, URL and URL424 columns
gp='/GAMA/data/files/GZClassifications/v01'
gu='http://www.gama-survey.org/data/files/GZClassifications/v01'
zu='http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo'
region = ['gama09' if x < 150
          else 'gama15' if x > 200
          else 'gama12' for x in dat['RA']]
fileNative = [f'{gp}/native/G{g}-native.png'
              for g in dat['CATAID']]
fileInvert = [f'{gp}/invert/G{g}-invert.png'
              for g in dat['CATAID']]
urlNative = [f'{gu}/native/G{g}-native.png'
             for g in dat['CATAID']]
urlInvert = [f'{gu}/invert/G{g}-invert.png'
             for g in dat['CATAID']]
urlNative424 = [f'{zu}/{r}/native/G{g}-native-424.png'
                for g,r in zip(dat['CATAID'],region)]
urlInvert424 = [f'{zu}/{r}/invert/G{g}-invert-424.png'
                for g,r in zip(dat['CATAID'],region)]
urlThumb = [f'{zu}/{r}/thumb/G{g}-thumb-150.png'
            for g,r in zip(dat['CATAID'],region)]
dat['FILENAME_NATIVE'] = fileNative
dat['FILENAME_INVERT'] = fileInvert
dat['URL_NATIVE'] = urlNative
dat['URL_INVERT'] = urlInvert
dat['URL_NATIVE_424'] = urlNative424
dat['URL_INVERT_424'] = urlInvert424
dat['URL_THUMB'] = urlThumb

In [7]:
display(dat[0:3])

CATAID,RA,DEC,zooniverse_id,subject_id,absmag_r,GALRE_r_kpc,Z_TONRY,Zfof,features_clean_total,features_deb_psamp,features_smooth_deb_frac,features_features_deb_frac,features_star_or_artifact_deb_frac,edgeon_clean_total,edgeon_deb_psamp,edgeon_yes_deb_frac,edgeon_no_deb_frac,bar_clean_total,bar_deb_psamp,bar_bar_deb_frac,bar_no_bar_deb_frac,spiral_clean_total,spiral_deb_psamp,spiral_spiral_deb_frac,spiral_no_spiral_deb_frac,bulge_clean_total,bulge_deb_psamp,bulge_no_bulge_deb_frac,bulge_obvious_deb_frac,bulge_dominant_deb_frac,spiralwinding_clean_total,spiralwinding_deb_psamp,spiralwinding_tight_deb_frac,spiralwinding_medium_deb_frac,spiralwinding_loose_deb_frac,spiralnumber_clean_total,spiralnumber_deb_psamp,spiralnumber_1_deb_frac,spiralnumber_2_deb_frac,spiralnumber_3_deb_frac,spiralnumber_4_deb_frac,spiralnumber_more_than_4_deb_frac,bulgeshape_clean_total,bulgeshape_deb_psamp,bulgeshape_rounded_deb_frac,bulgeshape_boxy_deb_frac,bulgeshape_no_bulge_deb_frac,round_clean_total,round_deb_psamp,round_completely_round_deb_frac,round_in_between_deb_frac,round_cigar_shaped_deb_frac,mergers_clean_total,mergers_deb_psamp,mergers_merging_deb_frac,mergers_tidal_debris_deb_frac,mergers_both_deb_frac,mergers_neither_deb_frac,oddtype_clean_total,oddtype_deb_psamp,oddtype_none_deb_frac,oddtype_ring_deb_frac,oddtype_lens_or_arc_deb_frac,oddtype_irregular_deb_frac,oddtype_other_deb_frac,oddtype_dust_lane_deb_frac,oddtype_overlapping_deb_frac,discuss_clean_total,discuss_deb_psamp,discuss_yes_deb_frac,discuss_no_deb_frac,FILENAME_NATIVE,FILENAME_INVERT,URL_NATIVE,URL_INVERT,URL_NATIVE_424,URL_INVERT_424,URL_THUMB
int32,float64,float64,bytes10,bytes24,float64,float64,float64,float64,float64,bool,float64,float64,float64,float64,bool,float64,float64,float64,bool,float64,float64,float64,bool,float64,float64,float64,bool,float64,float64,float64,float64,bool,float64,float64,float64,float64,bool,float64,float64,float64,float64,float64,float64,bool,float64,float64,float64,float64,bool,float64,float64,float64,float64,bool,float64,float64,float64,float64,float64,bool,float64,float64,float64,float64,float64,float64,float64,float64,bool,float64,float64,str65,str65,str86,str86,str101,str101,str99
6802,174.00598431,0.72093463,AGZ000e60u,5857e78ed369fd0040006001,-18.18183,1.12046880203965,0.051813804,,25.16696013229547,True,0.7129805401359826,0.3138542250369631,0.0146963453240995,2.0,False,0.0,1.0,2.0,False,0.8123914434439976,0.0460875687710786,2.0,False,0.0,1.0,2.0,False,0.0,0.0,1.0,0.0,False,-1.0,-1.0,-1.0,0.0,False,-1.0,-1.0,-1.0,0.0,-1.0,0.0,False,-1.0,-1.0,-1.0,22.72537781452592,True,0.0,0.8002174994255468,0.7303432536164496,24.725377814525917,True,0.0232919672516392,0.1097400532850593,0.0468530519309381,0.7729224514060943,24.725377814525917,True,0.1434757999296616,0.0,0.0,0.0,0.0881691049751382,0.0003288208125459,0.0,25.166960132295472,True,0.0602336137322129,0.9103971733601464,/GAMA/data/files/GZClassifications/v01/native/G6802-native.png,/GAMA/data/files/GZClassifications/v01/invert/G6802-invert.png,http://www.gama-survey.org/data/files/GZClassifications/v01/native/G6802-native.png,http://www.gama-survey.org/data/files/GZClassifications/v01/invert/G6802-invert.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/native/G6802-native-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/invert/G6802-invert-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/thumb/G6802-thumb-150.png
6816,174.01897122,0.66580358,AGZ000e60v,5857e78ed369fd0040006003,-17.854446,1.38936163109771,0.077715404,,30.625664309062003,True,0.2136598441845733,0.5539406941264615,0.3218207228437148,2.7687013957220907,False,0.1718447170659556,0.7825248613320017,2.0,False,0.0,1.0,2.0,False,0.0,1.0,2.0,False,0.27586559161395,0.6550339883144699,0.0,0.0,False,-1.0,-1.0,-1.0,0.0,False,-1.0,-1.0,-1.0,0.0,-1.0,0.7687013957220906,False,1.0,0.0,0.0,23.87677372279706,False,0.1031080509215943,0.6205745601052047,0.1237358447824236,26.64547511851915,True,0.1177998440399257,0.1164985860001981,0.0933512265388638,0.6132496195370996,26.64547511851915,True,0.2683272808141141,0.0,0.0,0.0,0.0,0.0,0.0,30.625664309062003,True,0.0,1.0,/GAMA/data/files/GZClassifications/v01/native/G6816-native.png,/GAMA/data/files/GZClassifications/v01/invert/G6816-invert.png,http://www.gama-survey.org/data/files/GZClassifications/v01/native/G6816-native.png,http://www.gama-survey.org/data/files/GZClassifications/v01/invert/G6816-invert.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/native/G6816-native-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/invert/G6816-invert-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/thumb/G6816-thumb-150.png
6821,174.15315328,0.81543855,AGZ000e60q,5857e78ed369fd0040006005,-15.608814,0.698103753888339,0.0045812135,,25.825580457972908,True,0.1548851924745457,0.6188427685952332,0.226272038930221,15.981973711190909,True,0.0801352636302301,0.9198647363697698,14.701254034513218,True,0.020743273881561,0.979256726118439,14.701254034513218,True,0.0680214080820835,0.9319785919179164,14.701254034513218,True,0.8635688597333222,0.0,0.1364311402666777,1.0,False,0.0,0.0,1.0,1.0,False,0.0,1.0,0.0,0.0,0.0,1.2807196766776912,False,0.8083479524078168,0.1974761008755277,0.0,4.0,False,0.0,1.0,0.0,19.981973711190907,True,0.0142748476802516,0.2654868941134314,0.000285700069535,0.7199525581367818,19.981973711190907,True,0.107258527931928,0.0,0.0,0.5199983342273601,0.3198179087191533,0.0500451063770517,0.1079570582056666,25.82558045797291,True,0.2433726375020414,0.7566273624979587,/GAMA/data/files/GZClassifications/v01/native/G6821-native.png,/GAMA/data/files/GZClassifications/v01/invert/G6821-invert.png,http://www.gama-survey.org/data/files/GZClassifications/v01/native/G6821-native.png,http://www.gama-survey.org/data/files/GZClassifications/v01/invert/G6821-invert.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/native/G6821-native-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/invert/G6821-invert-424.png,http://s3.amazonaws.com/zooniverse-data/project_data/galaxy_zoo/gama12/thumb/G6821-thumb-150.png


In [8]:
# write final output and gzip
output_cat = '../gkgz-final.fits.gz'
if output_cat[-3:] == '.gz':
    output_cat_TEMP = output_cat[:-3]
    if os.path.exists(output_cat_TEMP):
        os.remove(output_cat_TEMP)
    if os.path.exists(output_cat):
        os.remove(output_cat)
    dat.write(output_cat_TEMP)
    cmd = f'gzip --best {output_cat_TEMP}'
    !{cmd}
else:
    if os.path.exists(output_cat):
        os.remove(output_cat)
    dat.write(output_cat)

In [9]:
# Notebook finish up
nbt1, nbt1str = timecheck(nbt0)
print(f'Notebook end time:\n   {nbt1str}')
#--------------------------------68-------------------------------->

Notebook end time:
   2020-12-09 20:42:13 CST (elapsed: 00:00:08)
