# Make Hart (2017) table

https://data.galaxyzoo.org/

http://www.nsatlas.org/data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import sdssCutoutGrab as scg     
import pandas as pd
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.coordinates import match_coordinates_sky

Load in the required data files:
- Ross' catalog
- The NASA-Sloan Atlas (NSA)
- The Debiased Galaxy Zoo 2 data export

In [3]:
ROSS_CATALOGUE_LOC = '../../data_files/Hart2017/psi_table.csv'
NSA_LOC = '../../data_files/nsa_v1_0_1.fits'
GZ2_DATA_LOC = '../../data_files/gz2_hart16.fits.gz'

In [4]:
hart_df = pd.read_csv(ROSS_CATALOGUE_LOC)

In [5]:
nsa_fits = fits.open(NSA_LOC)
nsa_data = nsa_fits[1].data
nsa_keys = (
    'NSAID', 'ISDSS', 'INED', 'IAUNAME', # identifiers
    'RA', 'DEC', 'Z', 'ZDIST', # position
    'SERSIC_BA', 'SERSIC_PHI', # sersic photometry
    'PETRO_THETA', # azimuthally averaged petrosean radius
    'PETRO_BA90', 'PETRO_PHI90', # petrosean photometry at 90% light radius
    'PETRO_BA50', 'PETRO_PHI50', # ... at 50% light radius
    'RUN', 'CAMCOL', 'FIELD', 'RERUN',
    'ELPETRO_MASS', 'SERSIC_MASS',
)
nsa_df = pd.DataFrame(
    {k: nsa_data[k].byteswap().newbyteorder() for k in nsa_keys}
)

In [6]:
gz2_fits = fits.open(GZ2_DATA_LOC)
gz2_data = gz2_fits[1].data

gz2_keys = [
    'dr7objid', 'ra', 'dec', # to use for cross-referencing
    't01_smooth_or_features_a02_features_or_disk_debiased',
    't02_edgeon_a05_no_debiased',
    't04_spiral_a08_spiral_debiased',
    't11_arms_number_a31_1_flag',
    't11_arms_number_a32_2_flag',
    't11_arms_number_a33_3_flag',
    't11_arms_number_a34_4_flag',
    't11_arms_number_a36_more_than_4_flag',
    't11_arms_number_a37_cant_tell_flag',
]

gz2_df = pd.DataFrame(gz2_data)[gz2_keys]

# p_features * p_not_edge_on * p_spiral >= 0.5
spiral_mask = gz2_df.eval(
  't01_smooth_or_features_a02_features_or_disk_debiased \
    * t02_edgeon_a05_no_debiased \
    * t04_spiral_a08_spiral_debiased >= 0.5 \
')

Perform a Ra-Dec match between Ross' and the NSA

In [7]:
nsa_df.columns

Index(['NSAID', 'ISDSS', 'INED', 'IAUNAME', 'RA', 'DEC', 'Z', 'ZDIST',
       'SERSIC_BA', 'SERSIC_PHI', 'PETRO_THETA', 'PETRO_BA90', 'PETRO_PHI90',
       'PETRO_BA50', 'PETRO_PHI50', 'RUN', 'CAMCOL', 'FIELD', 'RERUN',
       'ELPETRO_MASS', 'SERSIC_MASS'],
      dtype='object')

In [8]:
hart_coords = SkyCoord(
  ra=hart_df['ra']*u.degree,
  dec=hart_df['dec']*u.degree
)
nsa_coords = SkyCoord(
  ra=nsa_df['RA'].values*u.degree,
  dec=nsa_df['DEC'].values*u.degree
)
nearest_idx, sep2d, d3d = match_coordinates_sky(hart_coords, nsa_coords)
sep_mask = np.logical_not(sep2d > 0.01*u.degree)

hart_matched_nsa = nsa_df.iloc[nearest_idx[sep_mask]]
hart_matched_nsa.index = hart_df.index[sep_mask]

Performe a DR7OBJID match between Ross' and GZ2

In [9]:
hart_matched_gz2 = gz2_df\
  .set_index('dr7objid')\
  .reindex(hart_df['dr7objid'])\
  .reset_index()
hart_matched_gz2.index = hart_df.index

In [10]:
final_table = pd.concat((
  hart_df,
  hart_matched_nsa.drop(['RA', 'DEC'], axis=1),
  hart_matched_gz2.drop(['ra', 'dec', 'dr7objid'], axis=1),
), axis=1)

In [11]:
final_table.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6222 entries, 0 to 6221
Data columns (total 35 columns):
dr7objid                                                6222 non-null int64
ra                                                      6222 non-null float64
dec                                                     6222 non-null float64
p_bar                                                   6222 non-null float64
psi_sparcfire                                           6222 non-null float64
psi_gz2                                                 6222 non-null float64
logMstar                                                6222 non-null float64
NSAID                                                   6114 non-null float64
ISDSS                                                   6114 non-null float64
INED                                                    6114 non-null float64
IAUNAME                                                 6114 non-null object
Z                                     

To ensure consistency with the previously calculated table, ensure that (once all required null values have been dropped), the dr7objids match:

In [12]:
original_table = np.load('originalTable.npy')

np.all(original_table['dr7objid'] == final_table[final_table['Z'].notna()]['dr7objid'])

True

Write the result out to a CSV:

In [13]:
final_table.to_csv('compiled_hart_catalog.csv')

It's worth noting here that here we do not calculate some of the columns in the original table - including `gz2NSpiralFlags`, and instead simply present the raw counts. However we do include more information - such as `p_bar`, `psi_sparcfire` and `psi_gz2`.

The function originally used for `gz2NSpiralFlags` is below:

## Make subject set indices

In [None]:
# known seed for consistency
np.random.seed(21508479)
sortedByRedshift = np.argsort(finalTable['z'])
zGtZeroMask = finalTable['z'][sortedByRedshift] > 0
# don't know why one thing has z < 0, but I'm gonna ignore it
finalOrderedByRedshift = finalTable[sortedByRedshift[zGtZeroMask]]

In [14]:
def getNSpiral(data):
    return data[[
        't11_arms_number_a31_1_flag',
        't11_arms_number_a32_2_flag',
        't11_arms_number_a33_3_flag',
        't11_arms_number_a34_4_flag',
        't11_arms_number_a36_more_than_4_flag',
        't11_arms_number_a37_cant_tell_flag'
    ]]