In [1]:
import pandas as pd
from astroquery.simbad import Simbad
import os

In [2]:
data1 = pd.read_csv('./stars.txt', header=None)
stars_names = data1.iloc[:, 0].tolist()

In [3]:
catalog = "Gaia DR3"

def find_gaia_dr3(value: str):
    if catalog in value:
        index = value.index(catalog)
        return value[index + len(catalog) + 1:index + len(catalog) + 25].strip().split('|')[0].strip()
    else:
        return None

In [4]:
find_gaia_dr3(
    "BD+36  4320|GSC 02700-03701|2MASS J20551542+3644188|TIC 195262851|TYC 2700-3701-1|Gaia DR2 1870094426454523904|Gaia DR3 1870094426454523904|Gaia DR1 1870094422138445824")

'1870094426454523904'

In [5]:
simbad_handler = Simbad()
simbad_handler.ROW_LIMIT = 0
simbad_handler.TIMEOUT = 60 * 60
simbad_handler.reset_votable_fields()

simbad_handler.add_votable_fields(
    "otype",
    "typed_id",
    "id(Gaia)",
    "ids",
    "sptype",
    "otypes"
)

In [6]:
result = []

for stars_name in stars_names:
    result_table = simbad_handler.query_object(stars_name).to_pandas()
    result_table = result_table[['MAIN_ID', 'OTYPE', 'SP_TYPE', 'ID_Gaia', 'IDS', 'OTYPES']]
    result_table['Gaia DR3'] = result_table.apply(lambda row: find_gaia_dr3(row['IDS']), axis=1)
    result_table.insert(0, 'FIND_NAME', stars_name)
    result.append(result_table)

df_result = pd.concat(result, ignore_index=True)

In [7]:
out_name = 'suspected.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_result.to_csv(fullname, header=True, index=False)

# Remove trained SY

In [8]:
df2 = pd.read_csv('../symbad/SY.csv')
df2.head(5)

Unnamed: 0,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,V* SY Mus,Symbiotic*,M4,Gaia DR3 5237239075896985728,HD 100336|AN 118.1914|CPD-65 11298|CSI-65-112...,SB*|LP*|V*|Em*|EmO|NIR|SB*|Sy*|*|IR|LP?|UV,5237239075896985728
1,WRAY 16-304,Symbiotic*,M3,Gaia DR2 5961300498162820352,UCAC2 15360443|ESO 334-8|Hen 2-275|2MASS J174...,LP*|MIR|NIR|Sy*|PN|*|G,5961300498162820352
2,UCAC4 122-038973,Symbiotic*,Be,Gaia DR3 5240016442258663936,UCAC4 122-038973|2MASS J11082717-6547183|SS73 ...,NIR|MIR|Sy*|Em*|*|LP?,5240016442258663936
3,LHA 115-N 60,Symbiotic*,"C3,3",Gaia DR2 4685585706769513344,SV* HV 1707|Cl* NGC 346 KWBBE 85|DENIS...,LP*|MIR|C*|NIR|V*|EmO|Em*|Sy*|*,4685585706769513344
4,V* V840 Cen,Symbiotic*,K5IIIe,Gaia DR3 6063900742825792000,GSC 08666-01230|DENIS J132049.4-555014|2MASS J...,Sy*|NIR|Sy*|V*|*,6063900742825792000


In [9]:
# Quedarse solo con las filas que no están en el otro DataFrame
df_filtered = df_result.merge(df2, left_on=['MAIN_ID'], right_on=['MAIN_ID'], how='left', indicator=True)
df_filtered = df_filtered[df_filtered['_merge'] == 'left_only']
df_filtered = df_filtered.iloc[:, :8]
df_filtered.columns = df_result.columns

# Imprimir el resultado
df_filtered

Unnamed: 0,FIND_NAME,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,StHa55,EM* StHA 55,Mira,,Gaia DR3 3321366590173335424,IRAS 05440+0642|ASAS J054642+0643.7|ASAS J0546...,Mi*|LP*|V*|LP*|SB*|LP*|MIR|V*|Em*|NIR|*|C*?|IR...,3321366590173335424
3,Hen2-442,Hen 2-442,PlanetaryNeb,,Gaia DR2 2022052808961769088,Hen 2-442|AKARI-IRC-V1 J1939433+262933|GSC2 N0...,PN|NIR|MIR|V*|*|IR,2022052808961769088
5,Hen2-379,PN M 1-44,PlanetaryNeb,Be,Gaia DR2 4052553745525657600,ESO 522-11|GSC2 S3012333276|Hen 2-379|IRAS 181...,PN|NIR|Em*|*|G|IR,4052553745525657600
7,AS288,PN H 2-43,PlanetaryNeb,,Gaia DR2 4050670827750135040,ESO 456-75|EM* AS 288|GSC2 S301232116082|Haro...,PN|Em*|NIR|EmO|*|G|IR|PN?,4050670827750135040
9,ZZ Cmi,V* ZZ CMi,LongPeriodV*,M6I-IIep,Gaia DR3 3155368612444708096,BD+09 1633|AN 306.1934|DO 2156|GCRV 4915|G...,LP*|NIR|V*|*|IR|LP?,3155368612444708096
10,V335Vul,EM* AS 356,C*,Ce,Gaia DR3 2022999316661190272,C* 2728|ASAS J192314+2427.7|Case 452|CGCS 425...,C*|Mi*|LP*|LP*|LP*|LP*|V*|EmO|NIR|MIR|C*|Em*|*...,2022999316661190272


# Remove trained NP

In [10]:
df2 = pd.read_csv('../symbad/NP.csv')
df2.head(5)

Unnamed: 0,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,PN M 1-1,PlanetaryNeb,,Gaia DR3 405830570741274752,PN G130.3-11.7|GSC2 N3302101150|GSC 03286-0032...,PN|MIR|NIR|PN|*|G|IR,405830570741274752
1,PN Vy 1-1,PlanetaryNeb,[WC],Gaia DR2 419326659360627968,PN VV 2|GSC2 N311023078|IRAS 00160+5335|2MA...,PN|PN|*|G|IR,419326659360627968
2,M 76,PlanetaryNeb,DOZ,Gaia DR2 406328439057955968,M 76|BWE 0139+5119|CSI+51-01391|GB6 B0139+511...,PN|PN|WD*|*|IR|Rad,406328443354164480
3,PN HDW 3,PlanetaryNeb,DAO.6,Gaia DR2 241918950690107264,PN G149.4-09.2|GSC2 N333121333134|PK 149-09 1...,PN|NIR|WD*|PN|*|WD?,241918950690107264
4,SH 2-188,PlanetaryNeb,DAO.6,Gaia DR2 509206447837376128,LBN 128.04-04.12|GSC2 N311302026110|LBN 633|...,WD*|PN|HII|*|ISM|Rad|WD?,509206447837376128


In [11]:
# Quedarse solo con las filas que no están en el otro DataFrame
df_filtered = df_filtered.merge(df2, left_on=['MAIN_ID'], right_on=['MAIN_ID'], how='left', indicator=True)
df_filtered = df_filtered[df_filtered['_merge'] == 'left_only']
df_filtered = df_filtered.iloc[:, :8]
df_filtered.columns = df_result.columns

# Imprimir el resultado
df_filtered

Unnamed: 0,FIND_NAME,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,StHa55,EM* StHA 55,Mira,,Gaia DR3 3321366590173335424,IRAS 05440+0642|ASAS J054642+0643.7|ASAS J0546...,Mi*|LP*|V*|LP*|SB*|LP*|MIR|V*|Em*|NIR|*|C*?|IR...,3321366590173335424
4,ZZ Cmi,V* ZZ CMi,LongPeriodV*,M6I-IIep,Gaia DR3 3155368612444708096,BD+09 1633|AN 306.1934|DO 2156|GCRV 4915|G...,LP*|NIR|V*|*|IR|LP?,3155368612444708096
5,V335Vul,EM* AS 356,C*,Ce,Gaia DR3 2022999316661190272,C* 2728|ASAS J192314+2427.7|Case 452|CGCS 425...,C*|Mi*|LP*|LP*|LP*|LP*|V*|EmO|NIR|MIR|C*|Em*|*...,2022999316661190272


# Remove trained RG

In [12]:
df2 = pd.read_csv('../symbad/RG.csv')
df2.head(5)

Unnamed: 0,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,2MASS J00373747+4938540,RGB*,,Gaia DR2 391445694478549888,TYC 3257-1046-1|AP J00373747+4938540|GSC 03257...,RG*|NIR|*,391445694478549888
1,BD+50 106,RGB*,K2,Gaia DR2 415592206774201088,BD+50 106|AG+50 53|AP J00355436+5105191|GS...,RG*|NIR|*,415592206774201088
2,HD 232231,RGB*,,Gaia DR2 415566471330378880,HD 232231|AG+50 56|AP J00370160+5041350|BD+4...,RG*|NIR|*,415566471330378880
3,2MASS J00320053+5054594,RGB*,,Gaia DR2 415760191535983744,TYC 3260-1281-1|AP J00320053+5054594|GSC 03260...,RG*|NIR|*,415760191535983744
4,2MASS J00300458+5111384,RGB*,,Gaia DR2 415822210863310336,TYC 3260-1485-1|AP J00300458+5111384|GSC 03260...,RG*|NIR|*,415822210863310336


In [13]:
# Quedarse solo con las filas que no están en el otro DataFrame
df_filtered = df_filtered.merge(df2, left_on=['MAIN_ID'], right_on=['MAIN_ID'], how='left', indicator=True)
df_filtered = df_filtered[df_filtered['_merge'] == 'left_only']
df_filtered = df_filtered.iloc[:, :8]
df_filtered.columns = df_result.columns

# Imprimir el resultado
df_filtered

Unnamed: 0,FIND_NAME,MAIN_ID,OTYPE,SP_TYPE,ID_Gaia,IDS,OTYPES,Gaia DR3
0,StHa55,EM* StHA 55,Mira,,Gaia DR3 3321366590173335424,IRAS 05440+0642|ASAS J054642+0643.7|ASAS J0546...,Mi*|LP*|V*|LP*|SB*|LP*|MIR|V*|Em*|NIR|*|C*?|IR...,3321366590173335424
1,ZZ Cmi,V* ZZ CMi,LongPeriodV*,M6I-IIep,Gaia DR3 3155368612444708096,BD+09 1633|AN 306.1934|DO 2156|GCRV 4915|G...,LP*|NIR|V*|*|IR|LP?,3155368612444708096
2,V335Vul,EM* AS 356,C*,Ce,Gaia DR3 2022999316661190272,C* 2728|ASAS J192314+2427.7|Case 452|CGCS 425...,C*|Mi*|LP*|LP*|LP*|LP*|V*|EmO|NIR|MIR|C*|Em*|*...,2022999316661190272


In [14]:
out_name = 'suspected_news.csv'
out_dir = './symbad'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_filtered.to_csv(fullname, header=True, index=False)

# Remove no found stars

In [15]:
df_filtered = df_filtered[df_filtered['MAIN_ID'].str.len() > 0]
df_filtered = df_filtered.dropna(subset=['Gaia DR3'])

out_name = 'suspected_dataset.csv'
out_dir = './built_dataset'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)

fullname = os.path.join(out_dir, out_name)
df_filtered.to_csv(fullname, header=True, index=False)