# Metadata testing

In [2]:
import json
import numpy as np
import pandas as pd
from astropy.io import fits
from astropy.table import Table
from astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u
import matplotlib.pyplot as plt
import seaborn as sns

In [65]:
gzb_subjects = pd.read_csv('lib/galaxy-builder-subjects.csv', index_col=0) \
    .query('subject_set_id >= 20561 and subject_set_id <= 21182')
gzb_metadata = gzb_subjects['metadata'].apply(json.loads).apply(pd.Series)

In [66]:
nsa_table = Table.read('../source_files/nsa_v1_0_1.fits', memmap=True)

In [67]:
gz_nsa_table = Table.read('../source_files/NSA_GalaxyZoo.fits', memmap=True)

In [68]:
gz_nsa_positions = gz_nsa_table[['dr7objid', 'ra_2', 'dec_2']].to_pandas().set_index('dr7objid').rename(columns={'ra_2': 'ra', 'dec_2': 'dec'})

In [69]:
gzb_positions = gzb_metadata.rename(columns={'SDSS dr7 id': 'dr7objid'})[['ra', 'dec', 'dr7objid']].dropna()

In [70]:
gzb_coords = SkyCoord(*gzb_positions[['ra', 'dec']].values.T, unit=u.degree)
nsa_coords = SkyCoord(*gz_nsa_positions[['ra', 'dec']].values.T, unit=u.degree)
idx, sep, _ = match_coordinates_sky(gzb_coords, nsa_coords)

In [98]:
MATCHING_THRESHOLD = 4 # distance in arcseconds

has_match = sep.arcsecond < MATCHING_THRESHOLD
print('{:.2%} have position matches within {} arcseconds'.format(has_match.sum() / has_match.size, MATCHING_THRESHOLD))

97.30% have position matches within 4 arcseconds


In [73]:
dr7_matched_df = pd.concat((
    gz_nsa_df.reindex(gzb_positions.dr7objid.astype(np.int64).values).reset_index(),
    gzb_positions.add_prefix('gzb_').reset_index(),
), axis=1)

In [81]:
did_not_dr7_match = dr7_matched_df['ra'].isna()
print('{:.2%} have dr7objid matches'.format(1 - did_not_dr7_match.sum() / len(did_not_dr7_match)))

97.30% have dr7objid matches


In [92]:
gzb_metadata[did_not_dr7_match.values]

Unnamed: 0_level_0,ra,Run,dec,Field,Rerun,Camcol,NSA id,#models,redshift,Common name,SDSS dr7 id,Url to view,#isModelling,Sersic axis ratio,Estimated distance,Petrosian radius (degrees)
subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
20901989,118.684814,5045,14.607033,46,301,4,485056,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",1.8e-05,Unknown,587741421632356667,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.436513,0.001208746456541121 * c / H_0,6.728252
20901991,161.50145,4504,34.967144,44,301,3,431909,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.006592,Unknown,587739097518047289,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.821631,0.007313817739486694 * c / H_0,17.295427
20902025,130.367538,3635,27.634048,134,301,3,316963,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.020635,Unknown,588016840174075960,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.695032,0.0206304844468832 * c / H_0,13.524295
20902056,242.090912,3910,4.722453,212,301,5,393042,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.036879,Unknown,587730023338279050,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.373941,0.0368216335773468 * c / H_0,9.522999
21686495,118.684814,5045,14.607033,46,301,4,485056,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",1.8e-05,Unknown,587741421632356667,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.436513,0.001208746456541121 * c / H_0,6.728252
21686497,161.50145,4504,34.967144,44,301,3,431909,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.006592,Unknown,587739097518047289,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.821631,0.007313817739486694 * c / H_0,17.295427
21686554,130.367538,3635,27.634048,134,301,3,316963,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.020635,Unknown,588016840174075960,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.695032,0.0206304844468832 * c / H_0,13.524295
21686585,242.090912,3910,4.722453,212,301,5,393042,"[{'frame': 0, 'model': 'GALAXY_BUILDER_DIFFERE...",0.036879,Unknown,587730023338279050,[skyserver link](+tab+http://skyserver.sdss.or...,True,0.373941,0.0368216335773468 * c / H_0,9.522999
