## Global Definitions

In [1]:
import sys
sys.path.append("/home/local-python/lib/python3.5/site-packages")
import numpy as np
import pandas as pd
import collections

pd.options.mode.chained_assignment = None

In [2]:
v=[-3.83082454004815753e+00,1.37459160094637340e+01,-2.20140582645629301e+01]

In [3]:
np.linalg.norm(v)

26.234408414741086

In [4]:
w=[2.37896083289735891e+01,3.56121948132816479e+00,1.04380434535207289e+01]

In [5]:
np.linalg.norm(w)

26.221756229851344

In [6]:
vsun=np.array([11.1,12.24+220,7.25])
vobjin=np.array([-11.32034, -22.38791, -7.62075])
np.linalg.norm(vsun+vobjin)

209.85253318138953

In [7]:
vobjout=np.array([-5.84243, 20.30468, -15.40202])
np.linalg.norm(vsun+vobjout)

252.7309099985352

# Build AstroRV Catalog

This notebook is aimed to build the AstroRV catalog, a complete compilation of astrometric and radial velocities information of nearby stars. 

This catalog was originally aimed at studying the interstellar origin probability for objects approaching the Solar System in unbound orbits

## Sources

In [8]:
#SOURCE DIR
SRC_DIR="src/"

#GAIA SOURCES                                                                                                                                                                        
TGAS_DIR="src/Astro/"

#SIMBAD SOURCES                                                                                                                                                                      
SIMBAD_DIR="src/Astro/"

#TYCHO2/HIPPARCOS SOURCES                                                                                                                                                            
HIPTYC_DIR="src/Astro/"

#RADIAL VELOCITY SOURCES                                                                                                                                                             
RV_DIR="src/RV/"

## Read input catalogs

### GAIA

In [9]:
# =============================================================================
# LECTURA DE ARCHIVOS DE GAIA
# =============================================================================

# Description:

# hip : Hipparcos identifier (int).
# tycho2_id : Tycho 2 identifier (string).
# ref_epoch : Reference epoch (double, Time[Julian Years]), expressed as a Julian Year in TCB.
# ra : Right ascension (double, Angle[deg]). Barycentric right ascension of the source in ICRS.
# ra error : Standard error of right ascension (double, Angle[mas]).
# dec : Declination (double, Angle[deg]). Barycentric declination of the source in ICRS.
# dec error : Standard error of declination (double, Angle[mas]).
# parallax : Parallax (double, Angle[mas]). Absolute barycentric stellar parallax of the source.
# parallax error : Standard error of parallax (double, Angle[mas]).
# pmra : Proper motion in right ascension direction (double, Angular Velocity[mas/year].
# pmra error : Standard error of proper motion in right ascension direction (double, Angular Velocity[mas/year]).
# pmdec : Proper motion in declination direction (double, Angular Velocity[mas/year].
# pmdec error : Standard error of proper motion in declination direction (double, Angular Velocity[mas/year]).
# phot g mean mag : G-band mean magnitude (double, Magnitude[mag]) Mean magnitude in the G band.
# l : Galactic longitude (double, Angle[deg]).
# b : Galactic latitude (double, Angle[deg]).

cols_gaia = ["hip", "tycho2_id", "ref_epoch", "ra", "ra_error", "dec", "dec_error", "parallax", "parallax_error", "pmra", "pmra_error", \
        "pmdec", "pmdec_error", "ra_dec_corr", "ra_parallax_corr", "ra_pmra_corr", "ra_pmdec_corr", "dec_parallax_corr", \
        "dec_pmra_corr", "dec_pmdec_corr", "parallax_pmra_corr", "parallax_pmdec_corr", "pmra_pmdec_corr", \
        "phot_g_mean_flux", "phot_g_mean_flux_error", "phot_g_mean_mag", "l", "b", "ecl_lon", "ecl_lat"]

for i in range(16):
    filename = TGAS_DIR + "TgasSource_000-000-0" + str(i).zfill(2) + ".csv.gz"
    if i == 0:
        print("Reading", filename)
        gaia = pd.read_csv(filename, usecols=cols_gaia)
    else:
        print("Reading", filename)
        DRx = pd.read_csv(filename, usecols=cols_gaia)
        gaia = gaia.append(DRx)

gaia = pd.DataFrame(gaia)

gaia_hip = gaia[gaia.hip.notnull()]
gaia_tyc = gaia[gaia.tycho2_id.notnull()]

print("Gaia: Subset Hipparcos:", len(gaia_hip))
print("Gaia: Subset Tycho-2:", len(gaia_tyc))
print("Total Gaia:", len(gaia))

Reading src/Astro/TgasSource_000-000-000.csv.gz
Reading src/Astro/TgasSource_000-000-001.csv.gz
Reading src/Astro/TgasSource_000-000-002.csv.gz
Reading src/Astro/TgasSource_000-000-003.csv.gz
Reading src/Astro/TgasSource_000-000-004.csv.gz
Reading src/Astro/TgasSource_000-000-005.csv.gz
Reading src/Astro/TgasSource_000-000-006.csv.gz
Reading src/Astro/TgasSource_000-000-007.csv.gz
Reading src/Astro/TgasSource_000-000-008.csv.gz
Reading src/Astro/TgasSource_000-000-009.csv.gz
Reading src/Astro/TgasSource_000-000-010.csv.gz
Reading src/Astro/TgasSource_000-000-011.csv.gz
Reading src/Astro/TgasSource_000-000-012.csv.gz
Reading src/Astro/TgasSource_000-000-013.csv.gz
Reading src/Astro/TgasSource_000-000-014.csv.gz
Reading src/Astro/TgasSource_000-000-015.csv.gz
Gaia: Subset Hipparcos: 93635
Gaia: Subset Tycho-2: 1963415
Total Gaia: 2057050


### HIPPARCOS

In [10]:
# Información disponible en: http://cdsarc.u-strasbg.fr/viz-bin/Cat?cat=I%2F239&target=readme&#sRM2.1
names_hip={}
names_hip[1] = "hip"
names_hip[8] = "ra_Hip"
names_hip[9] = "dec_Hip"
names_hip[11] = "parallax_Hip"
names_hip[12] = "pmra_Hip"
names_hip[13] = "pmdec_Hip"
names_hip[14] = "ra_error_Hip"
names_hip[15] = "dec_error_Hip"
names_hip[16] = "parallax_error_Hip"
names_hip[17] = "pmra_error_Hip"
names_hip[18] = "pmdec_error_Hip"
names_hip[19] = "ra_dec_corr_Hip"
names_hip[20] = "ra_parallax_corr_Hip"
names_hip[21] = "dec_parallax_corr_Hip"
names_hip[22] = "ra_pmra_corr_Hip"
names_hip[23] = "dec_pmra_corr_Hip"
names_hip[24] = "parallax_pmra_corr_Hip"
names_hip[25] = "ra_pmdec_corr_Hip"
names_hip[26] = "dec_pmdec_corr_Hip"
names_hip[27] = "parallax_pmdec_corr_Hip"
names_hip[28] = "pmra_pmdec_corr_Hip"
names_hip[71] = "HenryDraperId_Hip"
names_hip[5] = "Vmag_Hip"
#names_hip[76] = "sptype_Hip"

names_hip = collections.OrderedDict(sorted(names_hip.items()))

hipparcos = pd.read_csv(HIPTYC_DIR+"hip_main.dat.gz", delimiter="|", usecols = names_hip.keys(),  names = names_hip.values())
hipparcos = pd.DataFrame(hipparcos)

# Descartar elementos con paralajes nulos

n1 = len(hipparcos)
#hipparcos.apply(pd.to_numeric, errors="coerce").info()                                # Convierte la información en tipo float

hipparcos["ra_Hip"] = pd.to_numeric(hipparcos["ra_Hip"], errors="coerce")             # Convierte en np.nan los valores no numer.
hipparcos["dec_Hip"] = pd.to_numeric(hipparcos["dec_Hip"], errors="coerce")           # Convierte en np.nan los valores no numer.
hipparcos["parallax_Hip"] = pd.to_numeric(hipparcos["parallax_Hip"], errors="coerce") # Convierte en np.nan los valores no numer.
hipparcos["Vmag_Hip"] = pd.to_numeric(hipparcos["Vmag_Hip"], errors="coerce")         # Convierte en np.nan los valores no numer.
hipparcos.dropna(subset=["ra_Hip", "dec_Hip", "parallax_Hip"], inplace=True)          # Elimina los registros con paralaje nulo
hipparcos["source"]="hipparcos"


n2 = len(hipparcos)
print("Objects discarded:", n1-n2)

len(hipparcos)

  interactivity=interactivity, compiler=compiler, result=result)


Objects discarded: 263


117955

In [11]:
hipparcos[hipparcos.hip==118278]['parallax_Hip']

118173    38.76
Name: parallax_Hip, dtype: float64

## Hipparcos Improved (2007)

In [12]:
#HIPPARCOS VAN LEEUWEN
#I/311/hip2
name="HipVanLeeuwen2007.tsv"
print("Building catalogue %s..."%name)
comments=list(range(95))+[96,97]
hipvan=pd.read_csv(HIPTYC_DIR+name,sep=";",skiprows=comments)
cats=['HIP']
for cname in cats:
    hipvan[cname]=hipvan[cname].fillna('')
    hipvan[cname]=hipvan[cname].map(str)
dfstr=hipvan.select_dtypes(['object'])
hipvan[dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(hipvan))
hipvan.rename(columns=dict(HIP='hip'),inplace=True)
hipvan.fillna('NULL',inplace=True)
hipvan["n_HIP"]=hipvan["n_HIP"].map(lambda x:'NULL' if x=='' else x.replace(' ','_'))
ncols=dict()
for col in hipvan.columns:
    ncols[col]=col+"_Hip2" if col!='hip' else col
hipvan.rename(columns=ncols,inplace=True)
hipvan.hip=hipvan.hip.map(np.int64)
hipvan=hipvan[['_RAJ2000_Hip2', '_DEJ2000_Hip2', 'hip', 'Plx_Hip2', 'e_Plx_Hip2', 'pmRA_Hip2', 'e_pmRA_Hip2',
       'pmDE_Hip2', 'e_pmDE_Hip2','Hpmag_Hip2', 'e_Hpmag_Hip2', 'B-V_Hip2', 'e_B-V_Hip2', 'V-I_Hip2']]
columns={'_RAJ2000_Hip2':'ra_Hip2', '_DEJ2000_Hip2':'dec_Hip2', 'hip':'hip', 
         'Plx_Hip2':'parallax_Hip2', 'e_Plx_Hip2':'parallax_error_Hip2', 
         'pmRA_Hip2':'pmra_Hip2', 'e_pmRA_Hip2':'pmra_error_Hip2',
         'pmDE_Hip2':'pmdec_Hip2', 'e_pmDE_Hip2':'pmdec_error_Hip2'}
hipvan.rename(columns=columns,inplace=True)

#Merge with hipparcos
hipparcos=hipparcos.merge(hipvan, left_on="hip", right_on="hip", how="outer")

Building catalogue HipVanLeeuwen2007.tsv...
Number of objects in HipVanLeeuwen2007.tsv: 117955


### TYCHO

In [13]:
# Información disponible en: http://cdsarc.u-strasbg.fr/viz-bin/Cat?cat=I%2F239&target=readme&#sRM2.13
names_tyc={}
names_tyc[1] = "tycho2_id"
names_tyc[8] = "ra_Tyc"
names_tyc[9] = "dec_Tyc"
names_tyc[11] = "parallax_Tyc"
names_tyc[12] = "pmra_Tyc"
names_tyc[13] = "pmdec_Tyc"
names_tyc[14] = "ra_error_Tyc"
names_tyc[15] = "dec_error_Tyc"
names_tyc[16] = "parallax_error_Tyc"
names_tyc[17] = "pmra_error_Tyc"
names_tyc[18] = "pmdec_error_Tyc"
names_tyc[19] = "ra_dec_corr_Tyc"
names_tyc[20] = "ra_parallax_corr_Tyc"
names_tyc[21] = "dec_parallax_corr_Tyc"
names_tyc[22] = "ra_pmra_corr_Tyc"
names_tyc[23] = "dec_pmra_corr_Tyc"
names_tyc[24] = "parallax_pmra_corr_Tyc"
names_tyc[25] = "ra_pmdec_corr_Tyc"
names_tyc[26] = "dec_pmdec_corr_Tyc"
names_tyc[27] = "parallax_pmdec_corr_Tyc"
names_tyc[28] = "pmra_pmdec_corr_Tyc"
names_tyc[53] = "HenryDraperId_Tyc"
names_tyc[5] = "Vmag_Tyc"

names_tyc = collections.OrderedDict(sorted(names_tyc.items()))

tycho = pd.read_csv(HIPTYC_DIR+"tyc_main.dat", delimiter="|", usecols = names_tyc.keys(), names = names_tyc.values())
tycho = pd.DataFrame(tycho)

# Dividir la cadena de texto en los 3 campos numéricos que componen el ID de Tycho_2 (separados por espacios en blanco)
tycho["a"], tycho["b"], tycho["c"] = tycho["tycho2_id"].str.split().str

# Concatenar los 3 campos numéricos que componen el ID de Tycho_2, separados por guión.
tycho["tycho2_id"] = tycho["a"] + "-" + tycho["b"] + "-" + tycho["c"]

# Borrar campos usados para la conversión
del tycho["a"], tycho["b"], tycho["c"]

# Descartar elementos con paralajes nulos

n1 = len(tycho)
#tycho.apply(pd.to_numeric, errors="coerce").info()                           # Convierte la información en tipo float

tycho["ra_Tyc"] = pd.to_numeric(tycho["ra_Tyc"], errors="coerce")             # Convierte en np.nan los valores no numéricos
tycho["dec_Tyc"] = pd.to_numeric(tycho["dec_Tyc"], errors="coerce")           # Convierte en np.nan los valores no numéricos
tycho["parallax_Tyc"] = pd.to_numeric(tycho["parallax_Tyc"], errors="coerce") # Convierte en np.nan los valores no numéricos
tycho["Vmag_Tyc"] = pd.to_numeric(tycho["Vmag_Tyc"], errors="coerce")         # Convierte en np.nan los valores no numéricos
tycho.dropna(subset=["ra_Tyc", "dec_Tyc", "parallax_Tyc"], inplace=True)      # Elimina los registros con paralaje nulo
tycho["source"]="tycho"

n2 = len(tycho)
print("Objects discarded:", n1-n2)

len(tycho)

Objects discarded: 22887


1035445

### SIMBAD

In [14]:
cols = ["typedident","identifier", "radvel", "coord1(ICRS,J2000/2000)", "plx", "pm", "MagV", "spec.type"]
simbad = pd.read_csv(SIMBAD_DIR+"simbad.csv", usecols=cols, delimiter="|")
simbad = pd.DataFrame(simbad)

# Modificación del ID del catálogo para que quede en formato INTEGER
simbad["hip"] = simbad["typedident"].map(lambda x: str(x)[4:]).astype(float)
del simbad["typedident"]

simbad["plx"] = pd.to_numeric(simbad["plx"], errors="coerce")   

# Cálculo de RA y DEC
# 1. Eliminar espacios en blanco de los lados izquierdo y derecho de cada cadena de texto
simbad["coord1(ICRS,J2000/2000)"] = simbad["coord1(ICRS,J2000/2000)"].str.strip()

# 2. Dividir la cadena de texto en 6 campos (hh:mm:ss para RA y hh:mm:ss para DEC)
simbad["ra_h"], simbad["ra_m"], simbad["ra_s"], simbad["dec_h"], simbad["dec_m"], simbad["dec_s"] = \
    simbad["coord1(ICRS,J2000/2000)"].str.split(" ").str

# 3. Concatenar los 3 primeros campos mediante la fórmula de conversión de hh:mm:ss a grados para la ascensión recta
simbad["ra_simbad"] = simbad["ra_h"].astype(float)*15 + simbad["ra_m"].astype(float)/60 + simbad["ra_s"].astype(float)/3600

# 4. Concatenar los 3 últimos campos mediante la fórmula de conversión de hh:mm:ss a grados para la declinación
simbad["dec_simbad"] = np.sign(simbad["dec_h"].astype(float)) * ( \
    np.abs(simbad["dec_h"].astype(float)) + simbad["dec_m"].astype(float)/60 + simbad["dec_s"].astype(float)/3600 )

# 5. Borrar campos usados para la conversión
del simbad["coord1(ICRS,J2000/2000)"]
del simbad["ra_h"], simbad["ra_m"], simbad["ra_s"], simbad["dec_h"], simbad["dec_m"], simbad["dec_s"]

# Cálculo del movimiento propio
# 1. Eliminar espacios en blanco de los lados izquierdo y derecho de cada cadena de texto
simbad["pm"] = simbad["pm"].str.strip()

#2. Dividir la cadena de texto en 2 campos (PM_RA y PM_DEC)
simbad["pmra_simbad"], simbad["pmdec_simbad"] = simbad["pm"].str.split(" ").str

# 3. Borrar campos usados para la conversión
del simbad["pm"]

# 4. Elimina los registros con paralaje nulo
n1 = len(simbad)
simbad.dropna(subset=["ra_simbad", "dec_simbad", "plx"], inplace=True)      
n2 = len(simbad)
print("Objects discarded:", n1-n2)

# Formato velocidad radial
simbad["radvel"] = simbad["radvel"].str.strip()                   # Elimina espacios en blanco
simbad["radvel"] = simbad["radvel"].replace("~", np.nan)          # Reemplaza el caracter "~" por Null

# Modificar nombre de algunas columnas
simbad = simbad.rename(columns={"identifier": "name_simbad"})      # Modifica el nombre de la columna de nombres propios
simbad = simbad.rename(columns={"plx": "parallax_simbad"})         # Modifca el nombre de la columna de paralajes
simbad = simbad.rename(columns={"spec.type": "sptype_simbad"})     # Modifca el nombre de la columna de clasificación espectral
simbad = simbad.rename(columns={"radvel": "radial_vel_simbad"})    # Modifica el nombre de la columna de velocidades radiales
simbad = simbad.rename(columns={"MagV": "Vmag_simbad"})            # Modifica el nombre de la columna de magnitud V
simbad["source"]="simbad"

len(simbad)

Objects discarded: 175


118004

## Merging

### Merging with common fields

In [15]:
# db = Gaia + Hipparcos
gaia["source"] = "gaia"
exclusive_hip = hipparcos[~hipparcos.hip.isin(gaia_hip.hip)]
exclusive_hip.rename(columns=lambda x: x.replace("_Hip", ""), inplace=True)
exclusive_hip["source"] = "hipparcos"
db = pd.concat([gaia, exclusive_hip], axis=0)
print("Aporte Hipparcos:", len(exclusive_hip))

# db = db + Tycho
exclusive_tyc = tycho[~tycho.tycho2_id.isin(gaia_tyc.tycho2_id)]
exclusive_tyc.rename(columns=lambda x: x.replace("_Tyc", ""), inplace=True)
exclusive_tyc["source"] = "tycho"
db = pd.concat([db, exclusive_tyc], axis=0)
print("Aporte Tycho:", len(exclusive_tyc))

# db = db + Simbad
exclusive_simbad = simbad[~simbad.hip.isin(db.hip)]
exclusive_simbad.rename(columns=lambda x: x.replace("_simbad", ""), inplace=True)
exclusive_simbad = exclusive_simbad.loc[:,["hip", "ra", "dec", "parallax", "pmra", "pmdec"]]
exclusive_simbad["source"] = "simbad"
db = pd.concat([db, exclusive_simbad], axis=0)
print("Aporte Simbad:", len(exclusive_simbad))

# db = db + todos los elementos de Simbad [nombre_star, tipo_espectral, velocidad_radial, Vmag]
db = db.merge(simbad.loc[:,["hip", "name_simbad","sptype_simbad","radial_vel_simbad","Vmag_simbad"]], \
              left_on="hip", right_on="hip", how="left")

# Organizar columnas
cols = ["source"] + cols_gaia + ["Vmag","HenryDraperId"] + ["name_simbad","sptype_simbad","radial_vel_simbad","Vmag_simbad"]
db = db[cols]
db = db.reset_index(drop=True)

# Resultado
print("Tamaño total:", len(db))

Aporte Hipparcos: 24320
Aporte Tycho: 259824
Aporte Simbad: 67
Tamaño total: 2341261


### Merging with all fields

In [16]:
n1 = len(gaia)
print("Tamaño inicial de la base de datos alimentada con Gaia:", n1)

database = gaia.merge(hipparcos, left_on="hip", right_on="hip", how="outer")
n2 = len(database)
print("Tamaño de Hipparcos:", len(hipparcos), "Tamaño final de la base de datos maestra:", n2, ". Aporte:", n2-n1)

database = database.merge(tycho, left_on="tycho2_id", right_on="tycho2_id", how="outer")
n3 = len(database)
print("Tamaño de Tycho:", len(tycho), "Tamaño final de la base de datos maestra:", n3, ". Aporte:", n3-n2)

database = database.merge(simbad, left_on="hip", right_on="hip", how="outer")
n4 = len(database)
print("Tamaño de Simbad:", len(simbad), "Tamaño final de la base de datos maestra:", n4, ". Aporte:", n4-n3)

sources=database[['source_x','source_y']].fillna('')
sources.columns=['s1','s2','s3','s4']
database["source"]=sources["s1"].astype(str)+":"+sources["s2"]+":"+sources["s3"]+":"+sources["s4"]
del(database["source_x"])
del(database["source_y"])

database = database.reset_index(drop=True)

Tamaño inicial de la base de datos alimentada con Gaia: 2057050
Tamaño de Hipparcos: 117955 Tamaño final de la base de datos maestra: 2081370 . Aporte: 24320
Tamaño de Tycho: 1035445 Tamaño final de la base de datos maestra: 2341194 . Aporte: 259824
Tamaño de Simbad: 118004 Tamaño final de la base de datos maestra: 2341261 . Aporte: 67


In [17]:
database[database.hip==118278][['parallax','parallax_Hip','parallax_Hip2']]

Unnamed: 0,parallax,parallax_Hip,parallax_Hip2
117948,,38.76,39.85


## Radial velocities

### RV Catalogues

In [18]:
data=dict()
match=dict()
RVcat=pd.DataFrame()
srcdir=RV_DIR

###################################################
#READ CATALOGUES
###################################################
#MALDONADO2010
#J/A+A/521/A12/table1
name="Maldonado2010.tsv"
print("Building catalogue %s..."%name)
comments=list(range(82))+[83,84]
data[name]=pd.read_csv(srcdir+name,sep=";",skiprows=comments)
cats=['HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"]
df["eRV"]=data[name]["e_RV"]
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#WEB1995
#III/213/catalog
name="Web1995-HIP.csv"
print("Building catalogue %s..."%name)
data[name]=pd.read_csv(srcdir+name)
cats=['HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["e_RV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#WEB1995-TYC2
#III/213/catalog
name="Web1995-TYC2.csv"
print("Building catalogue %s..."%name)
data[name]=pd.read_csv(srcdir+name)
cats=['TYC2','HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["e_RV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#GCS2011
#J/A+A/530/A138/catalog
name="GCS2011.tsv"
print("Building catalogue %s..."%name)
comments=list(range(174))+[175,176]
data[name]=pd.read_csv(srcdir+name,sep="|",skiprows=comments)
cats=['HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["e_RV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#RAVE-DR5
#III/279/rave_dr5
name="RAVE-DR5.tsv"
print("Building catalogue %s..."%name)
comments=list(range(78))+[79,80]
data[name]=pd.read_csv(srcdir+name,sep=";",skiprows=comments)
cats=['TYCHO2']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
data[name]["TYC2"]=data[name]["TYCHO2"]
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="RAJ2000",DEJ2000="DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["HRV"].map(str)
df["eRV"]=data[name]["e_HRV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#PULKOVO
#III/252/table8
name="Pulkovo.tsv"
print("Building catalogue %s..."%name)
comments=list(range(61))+[62,63]
data[name]=pd.read_csv(srcdir+name,sep=";",skiprows=comments)
cats=['HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RA",DEJ2000="_DE")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["eRV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#FAMAEY2005
#J/A+A/430/165/tablea1
name="Famaey2005.tsv"
print("Building catalogue %s..."%name)
comments=list(range(118))+[119,120]
data[name]=pd.read_csv(srcdir+name,sep=";",skiprows=comments)
cats=['HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["e_RV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#BB2000
#III/213/catalogue
name="BB2000.csv"
print("Building catalogue %s..."%name)
data[name]=pd.read_csv(srcdir+name)
cats=['TYC2','HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=data[name]["e_RV"].map(str)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#MALARODA2012
#III/249/catalog
name="Malaroda2012.csv"
print("Building catalogue %s..."%name)
data[name]=pd.read_csv(srcdir+name)
cats=['TYC2','HIP']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="_RAJ2000",DEJ2000="_DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=1.0;df["eRV"]=df["eRV"].map(str) #TYPICAL VALUE FOR OTHER CATALOGUES
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

#GALAH I
#J/MNRAS/465/3203/catal
name="Galah.tsv"
print("Building catalogue %s..."%name)
comments=list(range(54))+[55,56]
data[name]=pd.read_csv(srcdir+name,sep=";",skiprows=comments)
cats=['TYC2']
for cname in cats:
    data[name][cname]=data[name][cname].fillna('')
    data[name][cname]=data[name][cname].map(str)
dfstr=data[name].select_dtypes(['object'])
data[name][dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(data[name]))
for cat in cats:
    cond=data[name][cat]!=''
    print("Number of objects in catalogue %s:"%cat,len(data[name][cat][cond]))
#STORING RESULTS
df=pd.DataFrame()
if not 'TYC2' in data[name].columns:df["TYC2"]=''
else:df["TYC2"]=data[name]["TYC2"]
if not 'HIP' in data[name].columns:df["HIP"]=''
else:df["HIP"]=data[name]["HIP"].apply(lambda x:x.replace('.0',''))
COORDS=dict(RAJ2000="RAJ2000",DEJ2000="DEJ2000")
for C in COORDS.keys():df[C]=data[name][COORDS[C]]
df["RV"]=data[name]["RV"].map(str)
df["eRV"]=0.6;df["eRV"]=df["eRV"].map(str) #Martell et al. (2017)
df["CAT"]=name
#ZERO ERRORS
cond=df.RV==''
df=df.drop(df.index[cond])
df.RV=df.RV.map(float)
df.eRV[df.eRV=='']=0.0
df.eRV=df.eRV.map(float)
med=df.eRV[df.eRV>0].median()
print("Median error:",med)
print("Number of entries with zero error:",len(df.eRV[df.eRV==0]))
df.eRV[df.eRV==0]=med
#RESULTING SIZE
print("Filtered catalogue:",len(df))
#FILLNA
RVcat=RVcat.append(df.fillna(''))

###################################################
#COMPILING FULL TABLE
###################################################
print("Compiling final catalogue...")
RVcat=RVcat.rename(columns={'TYC2':'tycho2_id','HIP':'hip'})
print("Compiling final catalogue...")
RVcatf=RVcat.reset_index(drop=True)
print("Number of unfiltered entries:",len(RVcatf))
print("Catalogues included:",np.unique(RVcat.CAT.values))
RVcatf.is_copy=False
cond=RVcatf.RV==''
RVcatf=RVcatf.drop(RVcatf.index[cond])
RVcatf.RV=RVcatf.RV.map(float)
cond=RVcatf.eRV==''
RVcatf=RVcatf.drop(RVcatf.index[cond])
RVcatf.eRV=RVcatf.eRV.map(float)
print("Number of filtered entries:",len(RVcatf))

RV=RVcatf
cats=['hip','tycho2_id']
for cname in cats:
    RV[cname]=RV[cname].fillna('')
    RV[cname]=RV[cname].map(str)
dfstr=RV.select_dtypes(['object'])
RV[dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
RV['hip']=RV['hip'].apply(lambda x:x.replace('.0',''))
print("Number of RV objects: %d"%len(RV))

RVCat=pd.DataFrame()
for col in "hip","tycho2_id":
    sel=RV[RV[col]!=''][[col,"eRV"]].sort_values([col,"eRV"])
    print("Number of total entries for %s: %d"%(col,len(sel)))
    index=sel[col].drop_duplicates().index
    uniq=RV.ix[index]
    print("Number of uniq entries for %s: %d"%(col,len(uniq)))
    RVCat=RVCat.append(uniq)

#RVCat.to_csv(RV_DIR+"RVCat.csv",index=False)
print("Total number of uniq objects:%d"%len(RVCat))

rv=RVCat

Building catalogue Maldonado2010.tsv...
Number of objects in Maldonado2010.tsv: 495
Number of objects in catalogue HIP: 495
Median error: 0.11
Number of entries with zero error: 35
Filtered catalogue: 473
Building catalogue Web1995-HIP.csv...
Number of objects in Web1995-HIP.csv: 494
Number of objects in catalogue HIP: 494
Median error: 1.7
Number of entries with zero error: 0
Filtered catalogue: 494
Building catalogue Web1995-TYC2.csv...
Number of objects in Web1995-TYC2.csv: 673
Number of objects in catalogue TYC2: 673
Number of objects in catalogue HIP: 495
Median error: 1.9
Number of entries with zero error: 1
Filtered catalogue: 673
Building catalogue GCS2011.tsv...


  interactivity=interactivity, compiler=compiler, result=result)


Number of objects in GCS2011.tsv: 16682
Number of objects in catalogue HIP: 14955
Median error: 0.4
Number of entries with zero error: 1678
Filtered catalogue: 14139
Building catalogue RAVE-DR5.tsv...
Number of objects in RAVE-DR5.tsv: 520701
Number of objects in catalogue TYCHO2: 309596
Median error: 1.189
Number of entries with zero error: 217
Filtered catalogue: 520701
Building catalogue Pulkovo.tsv...
Number of objects in Pulkovo.tsv: 35493
Number of objects in catalogue HIP: 35493
Median error: 1.6
Number of entries with zero error: 24715
Filtered catalogue: 35493
Building catalogue Famaey2005.tsv...
Number of objects in Famaey2005.tsv: 6690
Number of objects in catalogue HIP: 6690
Median error: 0.22
Number of entries with zero error: 0
Filtered catalogue: 6028
Building catalogue BB2000.csv...
Number of objects in BB2000.csv: 673
Number of objects in catalogue TYC2: 673
Number of objects in catalogue HIP: 495
Median error: 1.9
Number of entries with zero error: 1
Filtered catalogu

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


Number of total entries for tycho2_id: 322882
Number of uniq entries for tycho2_id: 270658
Total number of uniq objects:307525


## Merging

In [19]:
#Catalogs
print("GAIA:",len(gaia))
print("HIPPARCOS:",len(hipparcos))
print("TYCHO:",len(tycho))
print("SIMBAD:",len(simbad))
print("RV:",len(rv))

GAIA: 2057050
HIPPARCOS: 117955
TYCHO: 1035445
SIMBAD: 118004
RV: 307525


In [20]:
#Converting hip column into string
mdb=database
cats=['hip','tycho2_id']
for cname in cats:
    mdb[cname]=mdb[cname].fillna('')
    mdb[cname]=mdb[cname].map(str)
dfstr=mdb.select_dtypes(['object'])
mdb[dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
mdb['hip']=mdb['hip'].apply(lambda x:x.replace('.0',''))

### Merging GAIA with RV

In [21]:
#gaiasrc=gaia
gaiasrc=database

cols=["tycho2_id","hip"]
rvgaia=pd.DataFrame()
for col in cols[::1],cols[::-1]:
    print("Merging by %s..."%col[0])
    result=pd.merge(left=gaiasrc[gaiasrc[col[0]]!=''],
                    right=rv[rv[col[0]]!=''],
                    on=col[0])
    result=result.drop("%s_y"%col[1],1)
    result=result.rename(columns={"%s_x"%col[1]:col[1]})
    print("Number of matchings for %s: %d"%(col[0],len(result)))
    rvgaia=rvgaia.append(result)

rvgaia=rvgaia.fillna('NULL')
print("Number of matches: %d"%len(rvgaia))

Merging by tycho2_id...
Number of matchings for tycho2_id: 230610
Merging by hip...
Number of matchings for hip: 37358
Number of matches: 267968


In [22]:
#Make all empty HIPs Nulls
rvgaia['hip']=rvgaia['hip'].map(lambda x:'NULL' if x=='' else x)
#Remove spaces from simbad names
rvgaia['name_simbad']=rvgaia['name_simbad'].map(lambda x:'NULL' if x=='' else x.replace(' ','_'))
#Convert Vmag to float and if ~ assign an arbitrary value of 123
rvgaia['Vmag_simbad']=rvgaia['Vmag_simbad'].map(lambda x:'123' if x=='~' or x=='' or x=='NULL' else x).map(float)
#NULL parallax to 0
pcols=['parallax','parallax_Hip','parallax_Hip2','parallax_Tyc','parallax_simbad']
for pcol in pcols:
    rvgaia[pcol]=rvgaia[pcol].map(lambda x:'0' if x=='NULL' else x).map(float)
#Left only not null Gaia,Hip parallaxe
rvgaia=rvgaia[(rvgaia.parallax>0)|(rvgaia.parallax_Hip>0)|(rvgaia.parallax_Hip2>0)]

In [23]:
print("Total:",len(rvgaia))
print("Hipparcos ID:",(rvgaia.hip!='NULL').sum())
print("Hipparcos ID:",(rvgaia.tycho2_id!='NULL').sum())

Total: 244589
Hipparcos ID: 37213
Hipparcos ID: 244589


**Result of merging database Gaia + Hip + Tyc2 + Simbad:**

Number of matchings for tycho2_id: 230610

Number of matchings for hip: 37358

Number of matches: 267968

**Result of merging database Gaia :**

Number of matchings for tycho2_id: 210263

Number of matchings for hip: 25925

Number of matches: 236188

In [24]:
rvgaia[['tycho2_id','hip','name_simbad','Vmag_simbad','RV','parallax_error_Hip']+pcols]

Unnamed: 0,tycho2_id,hip,name_simbad,Vmag_simbad,RV,parallax_error_Hip,parallax,parallax_Hip,parallax_Hip2,parallax_Tyc,parallax_simbad
0,55-72-1,,,123.000,2.061,,2.090812,0.00,0.00,51.0,0.00
1,48-1138-1,,,123.000,-32.497,,4.695800,0.00,0.00,-49.3,0.00
2,55-1269-1,,,123.000,-5.475,,2.986565,0.00,0.00,-51.0,0.00
3,55-1181-1,,,123.000,-10.742,,2.872148,0.00,0.00,0.0,0.00
4,55-1308-1,,,123.000,-13.173,,2.510212,0.00,0.00,0.0,0.00
5,48-685-1,,,123.000,-3.081,,7.380755,0.00,0.00,119.8,0.00
6,48-120-1,,,123.000,-34.862,,3.150919,0.00,0.00,71.0,0.00
7,48-1076-1,,,123.000,55.321,,5.466958,0.00,0.00,37.1,0.00
8,48-1215-1,,,123.000,-27.494,,3.914086,0.00,0.00,0.0,0.00
9,55-551-1,,,123.000,17.492,,1.018379,0.00,0.00,0.0,0.00


In [124]:
#if 1:
    #db.to_csv(TGAS_DIR+"AstroComp.csv",index=False)
    #database.to_csv(TGAS_DIR+"Astro.csv",index=False)
    #rvgaia.to_csv(SRC_DIR+"AstroRV.csv",index=False)

In [26]:
",".join(rvgaia.columns)

'hip,tycho2_id,ref_epoch,ra,ra_error,dec,dec_error,parallax,parallax_error,pmra,pmra_error,pmdec,pmdec_error,ra_dec_corr,ra_parallax_corr,ra_pmra_corr,ra_pmdec_corr,dec_parallax_corr,dec_pmra_corr,dec_pmdec_corr,parallax_pmra_corr,parallax_pmdec_corr,pmra_pmdec_corr,phot_g_mean_flux,phot_g_mean_flux_error,phot_g_mean_mag,l,b,ecl_lon,ecl_lat,Vmag_Hip,ra_Hip,dec_Hip,parallax_Hip,pmra_Hip,pmdec_Hip,ra_error_Hip,dec_error_Hip,parallax_error_Hip,pmra_error_Hip,pmdec_error_Hip,ra_dec_corr_Hip,ra_parallax_corr_Hip,dec_parallax_corr_Hip,ra_pmra_corr_Hip,dec_pmra_corr_Hip,parallax_pmra_corr_Hip,ra_pmdec_corr_Hip,dec_pmdec_corr_Hip,parallax_pmdec_corr_Hip,pmra_pmdec_corr_Hip,HenryDraperId_Hip,ra_Hip2,dec_Hip2,parallax_Hip2,parallax_error_Hip2,pmra_Hip2,pmra_error_Hip2,pmdec_Hip2,pmdec_error_Hip2,Hpmag_Hip2,e_Hpmag_Hip2,B-V_Hip2,e_B-V_Hip2,V-I_Hip2,Vmag_Tyc,ra_Tyc,dec_Tyc,parallax_Tyc,pmra_Tyc,pmdec_Tyc,ra_error_Tyc,dec_error_Tyc,parallax_error_Tyc,pmra_error_Tyc,pmdec_error_Tyc,ra_dec_corr_Tyc,r

In [27]:
rvgaia[rvgaia.Vmag_simbad<6][['source','CAT','hip','tycho2_id','name_simbad','Vmag_simbad','parallax','parallax_error','parallax_Hip','parallax_error_Hip','radial_vel_simbad','RV','eRV']].sort_values('Vmag_simbad')

Unnamed: 0,source,CAT,hip,tycho2_id,name_simbad,Vmag_simbad,parallax,parallax_error,parallax_Hip,parallax_error_Hip,radial_vel_simbad,RV,eRV
29262,::hipparcos:simbad,Pulkovo.tsv,32349,,*_alf_CMa,-1.460,0.000000,,379.21,1.58,-5.50,-5.50,1.40
29039,::hipparcos:simbad,Pulkovo.tsv,30438,,*_alf_Car,-0.740,0.000000,,10.43,0.53,20.30,20.30,0.20
32718,::hipparcos:simbad,Pulkovo.tsv,69673,,*_alf_Boo,-0.050,0.000000,,88.85,0.74,-5.19,-5.20,2.30
32928,::hipparcos:simbad,Maldonado2010.tsv,71683,,*_alf_Cen_A,0.010,0.000000,,742.12,1.40,-21.40,-22.44,0.11
34769,::hipparcos:simbad,Pulkovo.tsv,91262,,*_alf_Lyr,0.030,0.000000,,128.93,0.55,-20.60,-20.60,7.30
28302,::hipparcos:simbad,Pulkovo.tsv,24608,,*_alf_Aur,0.080,0.000000,,77.29,0.89,29.19,29.50,6.50
28284,::hipparcos:simbad,Pulkovo.tsv,24436,,*_bet_Ori,0.130,0.000000,,4.22,0.81,17.80,17.80,2.60
29809,::hipparcos:simbad,Maldonado2010.tsv,37279,,*_alf_CMi,0.370,0.000000,,285.93,0.88,-3.2,-4.10,0.11
28730,::hipparcos:simbad,Famaey2005.tsv,27989,,*_alf_Ori,0.420,0.000000,,7.63,1.64,21.91,21.91,0.51
26704,::hipparcos:simbad,Pulkovo.tsv,7588,,*_alf_Eri,0.460,0.000000,,22.68,0.57,18.60,18.60,5.90


In [28]:
(rvgaia.Vmag_simbad<6).sum()

4568

In [126]:
#cond=rvgaia.Vmag_simbad!='NULL'
#rvgaia_sel=rvgaia[cond]
#rvgaia_sel["Vmag_simbad"]=rvgaia_sel["Vmag_simbad"].map(lambda x:float(x) if x!='~' else 25).astype(float)
#cond=rvgaia.Vmag_simbad<5
#rvgaia_sel=rvgaia_sel[cond]

In [None]:
#HIPPARCOS VAN LEEUWEN
#I/311/hip2
name="HipVanLeeuwen2007.tsv"
print("Building catalogue %s..."%name)
comments=list(range(95))+[96,97]
hipvan=pd.read_csv(HIPTYC_DIR+name,sep=";",skiprows=comments)
cats=['HIP']
for cname in cats:
    hipvan[cname]=hipvan[cname].fillna('')
    hipvan[cname]=hipvan[cname].map(str)
dfstr=hipvan.select_dtypes(['object'])
hipvan[dfstr.columns]=dfstr.apply(lambda x: x.str.strip())
print("Number of objects in %s:"%name,len(hipvan))
hipvan.rename(columns=dict(HIP='hip'),inplace=True)
hipvan.fillna('NULL',inplace=True)
hipvan["n_HIP"]=hipvan["n_HIP"].map(lambda x:'NULL' if x=='' else x.replace(' ','_'))
ncols=dict()
for col in hipvan.columns:
    ncols[col]=col+"_Hip2" if col!='hip' else col
hipvan.rename(columns=ncols,inplace=True)
hipvan=hipvan[['_RAJ2000_Hip2', '_DEJ2000_Hip2', 'hip', 'Plx_Hip2', 'e_Plx_Hip2', 'pmRA_Hip2', 'e_pmRA_Hip2',
       'pmDE_Hip2', 'e_pmDE_Hip2','Hpmag_Hip2', 'e_Hpmag_Hip2', 'B-V_Hip2', 'e_B-V_Hip2', 'V-I_Hip2']]
columns={'_RAJ2000_Hip2':'ra_Hip2', '_DEJ2000_Hip2':'dec_Hip2', 'hip':'hip', 
         'Plx_Hip2':'parallax_Hip2', 'e_Plx_Hip2':'parallax_error_Hip2', 
         'pmRA_Hip2':'pmra_Hip2', 'e_pmRA_Hip2':'pmra_error_Hip2',
         'pmDE_Hip2':'pmdec_Hip2', 'e_pmDE_Hip2':'pmra_error_Hip2'}
hipvan.rename(columns=columns,inplace=True)

In [30]:
hipvan.columns

Index(['ra_Hip2', 'dec_Hip2', 'hip', 'parallax_Hip2', 'parallax_error_Hip2',
       'pmra_Hip2', 'pmra_error_Hip2', 'pmdec_Hip2', 'pmdec_error_Hip2',
       'Hpmag_Hip2', 'e_Hpmag_Hip2', 'B-V_Hip2', 'e_B-V_Hip2', 'V-I_Hip2'],
      dtype='object')

In [31]:
hipparcos.hip=hipparcos.hip.map(str)

In [41]:
rvgaia[(rvgaia.tycho2_id == '7207-1734-1')]

Unnamed: 0,hip,tycho2_id,ref_epoch,ra,ra_error,dec,dec_error,parallax,parallax_error,pmra,...,ra_simbad,dec_simbad,pmra_simbad,pmdec_simbad,source,RAJ2000,DEJ2000,RV,eRV,CAT
108328,,7207-1734-1,2015,163.359,0.385439,-35.0906,0.249956,1.013968,0.603117,-2.20923,...,,,,,gaia:::,163.35933,-35.09064,4.339,1.189,RAVE-DR5.tsv
