This notebook is just meant to test the codes before I compile them as .py files.

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io import fits
import astropy.io.ascii as astropy_ascii
from astropy.table import Table

# 1. Fetch coordinates (ra, dec) from raw data

In [8]:
# 6dFGS
df = pd.read_csv('data/raw/6dfgs/campbell_table8.ascii', delim_whitespace=True)

# ra is still in hour -> need to convert to degrees
df_test = df[['ra', 'dec']]

In [10]:
Table.from_pandas(df_test).write('6dfgs_coord.ascii', format='ipac', overwrite=True)

In [23]:
# LAMOST
with fits.open('data/raw/lamost/lamost_DR7_VDcat_20200825.fits') as hdul:
    df = pd.DataFrame(hdul[1].data)

df[['ra', 'dec']].describe()

Unnamed: 0,ra,dec
count,85861.0,85861.0
mean,172.739314,23.402422
std,90.71845,17.346885
min,0.006891,-8.783024
25%,133.475564,7.475573
50%,171.662455,22.503952
75%,219.158605,36.1379
max,359.994162,77.790749


In [13]:
# # sdss
# df = pd.read_csv('data/raw/sdss/SDSS_spectro_mrafifrbbn.csv')

# df[['ra', 'dec']].describe()

# 2. Merge the spectroscopy and photometry

In [31]:
# Compare the original raw data to 2MASS (check if the order is the same)
with fits.open('data/raw/lamost/lamost_DR7_VDcat_20200825.fits') as hdul:
    df_lamost_spectro = pd.DataFrame(hdul[1].data)
    
df_lamost_tmass = pd.read_csv('data/raw/2mass/lamost_tmass.csv', low_memory=False)

df = pd.merge(df_lamost_spectro[['ra', 'dec']], df_lamost_tmass[['ra_01', 'dec_01']], left_index=True, right_index=True)

df['delta_ra'] = df['ra_01']-df['ra']
df['delta_dec'] = df['dec_01']-df['dec']


df[['delta_ra', 'delta_dec']].describe()

Unnamed: 0,delta_ra,delta_dec
count,85861.0,85861.0
mean,1.164661e-14,0.0
std,2.621386e-11,0.0
min,-4.000015e-09,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,5.000004e-09,0.0


In [33]:
# Compare the original raw data to 2MASS
df_6df_spectro = pd.read_csv('data/raw/6dfgs/campbell_table8.ascii', delim_whitespace=True)
df_6df_spectro['ra'] = df_6df_spectro['ra']*15

df_6df_tmass = pd.read_csv('data/raw/2mass/6dfgs_tmass.csv', low_memory=False)

df = pd.merge(df_6df_spectro[['ra', 'dec']], df_6df_tmass[['ra_01', 'dec_01']], left_index=True, right_index=True)

df['delta_ra'] = df['ra_01']-df['ra']
df['delta_dec'] = df['dec_01']-df['dec']

df[['delta_ra', 'delta_dec']].describe()

Unnamed: 0,delta_ra,delta_dec
count,11102.0,11102.0
mean,2.142063e-16,0.0
std,1.650116e-14,0.0
min,-5.684342e-14,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,0.0
max,5.684342e-14,0.0


# Coba2 cross-matching

In [70]:
from astropy import units as u
from astropy.coordinates import SkyCoord

In [106]:
# LAMOST
with fits.open('data/raw/lamost/lamost_DR7_VDcat_20200825.fits') as hdul:
    df_lamost = Table(hdul[1].data).to_pandas()

df_sdss = pd.read_csv('data/raw/sdss/SDSS_spectro_mrafifrbbn.csv')

In [107]:
sdss_coords = SkyCoord(ra=df_sdss['ra'].to_numpy()*u.deg, dec=df_sdss['dec'].to_numpy()*u.deg)
lamost_coords = SkyCoord(ra=df_lamost['ra'].to_numpy()*u.deg, dec=df_lamost['dec'].to_numpy()*u.deg)

In [108]:
idx, sep2d, _ = lamost_coords.match_to_catalog_sky(sdss_coords)
is_counterpart = sep2d < 1.*u.arcsec

df_lamost['sdss_idx'] = idx
df_lamost['sdss_counterpart'] = is_counterpart

df_lamost = df_lamost[df_lamost['sdss_counterpart']==True]

In [109]:
df_lamost_sdss = df_lamost.merge(df_sdss, left_on='sdss_idx', right_index=True)

In [111]:
df_lamost_sdss[['ra_x', 'dec_x', 'ra_y', 'dec_y', 'veldisp_x', 'sigmaStars']]

Unnamed: 0,ra_x,dec_x,ra_y,dec_y,veldisp_x,sigmaStars
8,47.255500,-0.739480,47.255598,-0.739559,252.0,98.81241
21,146.099723,26.585136,146.099740,26.585145,176.0,158.59010
40,121.816470,6.052110,121.816460,6.052112,80.0,108.24700
42,121.637230,6.162760,121.637240,6.162754,94.0,45.21399
52,122.271720,5.976240,122.271740,5.976251,154.0,87.56513
...,...,...,...,...,...,...
85849,242.067374,3.960888,242.067370,3.960833,157.0,156.89590
85850,227.049393,27.240308,227.049390,27.240311,260.0,253.97280
85855,227.223969,26.641746,227.223890,26.641726,204.0,168.14610
85856,226.245001,27.678836,226.245000,27.678835,200.0,187.98010


## Main code for merging

### 6dFGS

In [122]:
# 6dFGS
df1 = pd.read_csv('data/raw/6dfgs/campbell_table8.ascii', delim_whitespace=True)
df2 = pd.read_csv('data/raw/6dfgs/campbell_table2.ascii', delim_whitespace=True)

df = df1.merge(df2, left_on='2MASSid', right_on='tmass')

In [124]:
df.columns

Index(['6dFGSid', '2MASSid', 'ra', 'dec', 'cz', 'logRe_J', 'elogRe_J',
       'logRe_H', 'elogRe_H', 'logRe_K', 'elogRe_K', 'logIe_J', 'elogIe_J',
       'logIe_H', 'elogIe_H', 'logIe_K', 'elogIe_K', 'logsigma', 'elogsigma',
       'mtype', 'Groupid', 'Nr', 'zgroup', 'dist5', 'sdens5', 'sampcodeJ',
       'pvcodeJ', 'sampcodeH', 'pvcodeH', 'sampcodeK', 'pvcodeK', 'n', 'sdf',
       'tmass', 'mjd', 'z_helio', 's2n', 'r', 'cor_sigma', 'dex_error'],
      dtype='object')

In [132]:
len(df2)

11520