In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from typing import List

from tqdm import tqdm

from spluslib import SplusService, ImageType
from xmatchlib import XTable, CrossMatch
from utils import load_table, save_table

In [2]:
def batch_download_cluster_region(
  cluster: List[str], 
  central_ra:List[float], 
  central_dec: List[float], 
  radius: List[float], 
  zml_min: List[float], 
  zml_max: List[float]
):
  query_template = """
  SELECT 
    dual_g.ID, dual_g.RA, dual_g.DEC, dual_g.Field, 
    dual_g.g_auto, dual_r.r_auto, dual_i.i_auto, dual_r.r_aper_6,
    photoz.zml, photoz.odds, morpho.PROB_GAL, '{cluster}' AS cluster
  FROM
    idr4_dual_g AS dual_g
    INNER JOIN
    idr4_dual_i AS dual_i ON dual_i.ID = dual_g.ID
    INNER JOIN
    idr4_dual_r AS dual_r ON dual_r.ID = dual_g.ID
    INNER JOIN
    idr4_photoz AS photoz ON photoz.ID = dual_g.ID
    INNER JOIN
    idr4_star_galaxy_quasar AS morpho ON morpho.ID = dual_g.ID
  WHERE
    photoz.zml BETWEEN {zml_min} AND {zml_max} AND
    1 = CONTAINS(
      POINT('ICRS', dual_g.ra, dual_g.dec), 
      CIRCLE('ICRS', {ra}, {dec}, {radius})
    )
  """
  
  query = [
    query_template.format(
      cluster=_cluster, 
      ra=_central_ra, 
      dec=_central_dec, 
      radius=_radius, 
      zml_min=_zml_min, 
      zml_max=_zml_max
    ) for _cluster, _central_ra, _central_dec, _radius, _zml_min, _zml_max 
      in zip(cluster, central_ra, central_dec, radius, zml_min, zml_max)
  ]

  save_path = [
    Path('outputs_v3') / f'cluster_{_cluster}.csv' for _cluster in cluster
  ]

  sp = SplusService(username='natanael', password='natan')
  sp.batch_query(query, save_path=save_path, replace=True, scope='private', workers=7)
  
  
def concat_tables(paths, save_path):
  df = load_table(paths[0])
  
  for i in tqdm(range(1, len(paths))):
    df2 = pd.read_csv(paths[i])
    df = pd.concat((df, df2), ignore_index=True)
  
  df = df[df.columns.drop(list(df.filter(regex='Unnamed:*')))]
  save_table(df, save_path)

In [9]:
PHOTOZ_RANGE = 0.04
Z_RANGE = 0.02

cm = CrossMatch()
# df_z_cluster = load_table('public/catalog_chinese_xray.csv')
df_search = load_table('tables/catalog_chinese_xray_xmatch_splus_z0.1.csv')
print(f'>> Initial search table: {len(df_search)}')
df_search = df_search[df_search['z'].between(0.02, 0.1)]
print(f'>> Search table after z filter: {len(df_search)}')
df_search = df_search[~df_search['ra'].isnull() & ~df_search['dec'].isnull()]
print(f'>> Search table after available filter: {len(df_search)}')
df_search['name'] = df_search['name'].str.replace('_', '-')

df_spec = pd.read_csv('tables/SpecZ_Catalogue_20230830.csv', low_memory=False)
df_spec = df_spec[df_spec['z'].between(0.02 - Z_RANGE, 0.1 + Z_RANGE)]
df_spec = df_spec.reset_index().copy(deep=True)

df_radius = pd.read_csv('tables/z_rad15mpc-degb.dat', sep=' ')


cluster_download_params = {
  'cluster': [], 'central_ra': [], 'central_dec': [], 
  'radius': [], 'zml_min': [], 'zml_max': []
}
for _, row in df_search.iterrows():
  search_radius = df_radius.iloc[(df_radius['z'] - row['z']).abs().argsort()[:1]]['radius'].values[0]
  cluster_redshift = row['z']
  cluster_download_params['cluster'].append(row['name'])
  cluster_download_params['central_ra'].append(row['ra'])
  cluster_download_params['central_dec'].append(row['dec'])
  cluster_download_params['radius'].append(search_radius)
  cluster_download_params['zml_min'].append(cluster_redshift - PHOTOZ_RANGE)
  cluster_download_params['zml_max'].append(cluster_redshift + PHOTOZ_RANGE)

print('>> Downloading photo catalog for all clusters')
batch_download_cluster_region(
  cluster=cluster_download_params['cluster'], 
  central_ra=cluster_download_params['central_ra'], 
  central_dec=cluster_download_params['central_dec'], 
  radius=cluster_download_params['radius'], 
  zml_min=cluster_download_params['zml_min'], 
  zml_max=cluster_download_params['zml_max'],
)

print('>> Crossmatch with spec catalog')
for _, row in tqdm(df_search.iterrows(), total=len(df_search)):
  cluster_redshift = row['z']
  path = Path('outputs_v3') / f'cluster_{row["name"]}.csv'
  cluster_photo = load_table(path)
  mask = cluster_photo['zml'].between(cluster_redshift - PHOTOZ_RANGE, cluster_redshift + PHOTOZ_RANGE)
  cluster_photo = cluster_photo[mask].reset_index().copy(deep=True)
  
  xt_base = XTable(ra='RA', dec='DEC', df=cluster_photo)
  xt_spec = XTable(ra='RA', dec='DEC', df=df_spec, columns=['z', 'e_z'])
  
  cm = CrossMatch()
  match = cm.left_join(xt_base, xt_spec, radius=1)
  match_df = match.table
  match_df = match_df[
    ~match_df['z'].isna() | # objects with spec
    (
      match_df['z'].isna() & 
      match_df['zml'].between(cluster_redshift - Z_RANGE, cluster_redshift + Z_RANGE)
    ) # objects wo spec but within a lower photoz range
  ]
  match_df.to_csv(Path('outputs_v3') / (path.stem + '_photo+spec.csv'), index=False)
  
print('>> Concatenate tables')
concat_tables(list(Path('outputs_v3').glob('*photo+spec.csv')), 'outputs_v3/clusters_v3.csv')

>> Initial search table: 37
>> Search table after z filter: 33
>> Search table after available filter: 33
>> Downloading photo catalog for all clusters


100%|██████████| 33/33 [22:12<00:00, 40.38s/ files] 


>> Crossmatch with spec catalog


100%|██████████| 33/33 [01:24<00:00,  2.56s/it]


>> 


100%|██████████| 32/32 [00:09<00:00,  3.53it/s]


In [18]:
print('>> Preparing tables to send')
for path in tqdm(list(Path('outputs_v3').glob('*photo+spec.csv'))):
  tb = load_table(path)
  cluster_name = tb['cluster'].values[0]
  tb = tb[['RA', 'DEC', 'z', 'e_z']]
  tb = tb[~tb['z'].isna()] # filter objects without spec
  tb['e_z'] = tb['e_z'].fillna(0) # nan error -> 0.0
  tb = tb.rename(columns={'z': 'zspec', 'e_z': 'zspec-err'})
  tb.to_csv('outputs_v3/paulo/' + 'cluster_' + cluster_name + '_objects.csv', index=False)
  df_search_copy = df_search[['ra', 'dec', 'z']]
  df_search_copy = df_search_copy.rename(columns={'ra': 'RA', 'dec': 'DEC', 'z': 'zspec'})
  df_search_copy.to_csv('outputs_v3/paulo/all_clusters.csv', index=False)

>> Preparing tables to send


100%|██████████| 33/33 [00:06<00:00,  5.03it/s]


In [None]:
df = load_table('outputs_v3/clusters_v3.parque')
