In [3]:
# imports
from tqdm import tqdm #progress bar
import math
import statistics
import pandas as pd
import numpy as np
from uncertainties import ufloat
from uncertainties.umath import * 

# SWEET-Cat
import urllib
from astropy import coordinates as coord
from astropy import units as u

from astropy.table import Table
from astropy.io.ascii import convert_numpy

# astroquery
from astroquery.simbad import Simbad
from astroquery.utils.tap.core import TapPlus #tap service

from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive as nas
from astroquery.utils.tap.core import TapPlus #tap service

from astroquery.vizier import Vizier
from astroquery.utils.tap.core import TapPlus #tap service

In [4]:
#Download SWEET-Cat
sweetCat_table_url = "https://sweetcat.iastro.pt/catalog/SWEETCAT_Dataframe.csv"
converters={'gaia_dr2': [convert_numpy(np.int64)],'gaia_dr3': [convert_numpy(np.int64)] }
sweet_table = Table.read(sweetCat_table_url, encoding='UTF-8',format='csv', converters=converters)

sweet_stars = sweet_table['Name']
gaia = sweet_table['gaia_dr2']
identifiers = []
for i in gaia:
    identifiers.append("Gaia DR2 " + str(i))
    
#simbad
simbadtap = TapPlus(url="http://simbad.u-strasbg.fr/simbad/sim-tap")
#nasa 
#arch = nas.query_criteria(table="ps")
#vizier
v = Vizier(columns=['vsini', 'e_vsini'])

In [13]:
def simbad(star_name, sweet_table, simbadtap): #query simbad catalog and make csv file for every star
    
    sweet_stars = sweet_table['Name']
    
    # check if star is in SWEET-Cat
    if star_name not in sweet_stars:
        return "Not a SWEET-Cat star name."
    
    # Get star id
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    gaia3_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr3']
    
    star_id2 = "gaia_dr2 " + str(gaia2_id[0])
    star_id3 = "gaia_dr3 " + str(gaia3_id[0])
    
    # Query Simbad and make dataframe
    #simbadtap = TapPlus(url="http://simbad.u-strasbg.fr/simbad/sim-tap")
    
    job = simbadtap.launch_job("select main_id,mrot.vsini,mrot.vsini_err, mrot.bibcode from basic join ident as i on i.oidref = oid join mesRot as mrot on mrot.oidref = oid where i.id in ('"+ star_id2 + "')")
    
    result = job.get_results()

    cat = ['simbad']*len(result)
    
    df = result.to_pandas()
    df = df.drop(['main_id'], axis=1)
    err_low = -df['vsini_err']  
    #df['vsini_err2'] = err_low
    df.insert(2, 'vsini_err2', err_low)
    df['Catalogue'] = cat
    
    # Make csv file with the values
    if "/" in star_name or "*" in star_name:
        file_name = star_id2
    else:
        file_name = star_name
        
    df.to_csv('{}.txt'.format(file_name), index=False, header=False, sep = '\t')
    
    return 

In [5]:
sweet_stars = sweet_table['Name']
    
gaia = sweet_table['gaia_dr2']
    
identifiers = []
for i in gaia:
    identifiers.append("Gaia DR2 " + str(i))
    
#arch = nas.query_criteria(table="ps")
    
result_nasa = arch[np.in1d(arch['gaia_id'], identifiers)]
    
result_nasa = result_nasa["gaia_id","st_vsin","st_vsinerr1","st_vsinerr2","st_refname"]
    
df_nasa = result_nasa.to_pandas()
df_nasa = df_nasa.dropna(subset=['st_vsin'])
df_nasa = df_nasa.drop_duplicates()

def nasa_exo(star_name, sweet_table, df_nasa):
    
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    star_id2 = "Gaia DR2 " + str(gaia2_id[0])
    
    #dataframe for each star
    df_star = df_nasa[df_nasa["gaia_id"] == star_id2]
    df_star = df_star.drop(['gaia_id'], axis=1)
    cat = ['nasa_exoplanet_archive']*len(df_star)
    df_star['Catalogue'] = cat
    
    # Make csv file with the values
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name

    df_star.to_csv('{}.txt'.format(file_name), mode='a', index=False, header=False, sep = '\t')
    
    return 

In [6]:
def vizier(star_name, sweet_table, catalogs_names):
    
    sweet_stars = sweet_table['Name']
    
    # check if star is in SWEET-Cat
    if star_name not in sweet_stars:
        return "Not a SWEET-Cat star name."
    
    # Get star id
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    gaia3_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr3']
    
    star_id2 = "Gaia DR2 " + str(gaia2_id[0])
    star_id3 = "Gaia DR3 " + str(gaia3_id[0])
    
    # query vizier
    for catalog_name in catalogs_names:
        result = Vizier(catalog=catalog_name, columns=['vsini', 'e_vsini']).query_object(star_name)
    
        frames = []
        for i in range(len(result)):
            df = result[i].to_pandas()
            frames.append(df)

        if len(frames) > 0:
            df_vizier = pd.concat(frames)
            #print(df_vizier)
            if len(df_vizier.columns) == 1:
                evsini = [float("nan")]*len(df_vizier)
                df_vizier['e_vsini'] = evsini
                #print(df_vizier)
            # edit dataframe
            cat = ['vizier_{}'.format(catalog_name)]*len(df_vizier)
            ref = [float("nan")]*len(df_vizier)
            err_low = - df_vizier['e_vsini']
            #print(err_low)
            df_vizier['err_low'] = err_low
            df_vizier['ref'] = ref
            df_vizier['Catalogue'] = cat

            # Make csv file with the values
            star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
            if "/" in star_name or "*" in star_name:
                file_name = star_id2_simbad
            else:
                file_name = star_name

            df_vizier.to_csv('{}.txt'.format(file_name), mode='a', index=False, header=False, sep = '\t')

    return 

In [7]:
catalogs_names = ['J/A+A/594/A39/', 'J/AJ/163/179/', 'J/A+A/664/A78/', 'III/244/', 'J/ApJS/237/38/', 'J/ApJ/919/138/', 'V/156/', 'J/A+A/654/A137/', 'J/A+A/662/A66/']

## testing for two stars: 11 Com & HD 96167

In [9]:
simbad('11 Com', sweet_table, simbadtap)
nasa_exo('11 Com', sweet_table, df_nasa)
vizier('11 Com', sweet_table, catalogs_names)

In [10]:
simbad('HD 96167', sweet_table, simbadtap)
nasa_exo('HD 96167', sweet_table, df_nasa)
vizier('HD 96167', sweet_table, catalogs_names)

## Creating the complete files

Warnings:
 - Always run in the order simbad --> nasa_exo --> vizier
 - Never run nasa_exo or vizier twice without a simbad in between (it will just add the same values again, but having simbad there restarts the file)

In [14]:
# SIMBAD
for star_name in tqdm(sweet_stars):
    #print(star_name)
    simbad(star_name, sweet_table, simbadtap)

100%|██████████████████████████████████████████████████████████████████████████████| 3803/3803 [12:21<00:00,  5.13it/s]


In [16]:
# NASA
for star_name in tqdm(sweet_stars):
    #print(star_name)
    nasa_exo(star_name, sweet_table, df_nasa)

100%|██████████████████████████████████████████████████████████████████████████████| 3803/3803 [01:02<00:00, 60.41it/s]


In [23]:
# VIZIER
for star_name in tqdm(sweet_stars):
    #print(star_name)
    vizier(star_name, sweet_table, catalogs_names)

100%|████████████████████████████████████████████████████████████████████████████| 3803/3803 [2:27:25<00:00,  2.33s/it]


#### Deleting duplicates and prepping the dataframe
Because I had to run vizier several times until it had gone through all of the stars, it kept adding the same values for the stars it had already ran, so I will just drop the duplicates

In [27]:
for star_name in tqdm(sweet_stars):    
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    data = pd.read_csv('{}.txt'.format(file_name), sep='\t', header=None, names=['vsini', 'err_1', 'err_2', 'Ref', 'Catalog'])
    #print(len(data))
    data = data.drop_duplicates()
    data.to_csv('{}.txt'.format(file_name), index=False, header=True, sep = '\t')
    #print(len(data))

100%|██████████████████████████████████████████████████████████████████████████████| 3803/3803 [01:22<00:00, 45.95it/s]


#   --------------------- No need to run any of the above cells again! ---------------------

## Creating a function that will query the catalogs and select a final value of vsini for each star based on a criteria
This final table will list the SWEET-Cat stars names and a single value of vsini.

To select this vsini value, we need to choose a selection creterion among the following:

 1. Average of all values (non reliable), disregarding uncertainties since some values have no uncertainty (disregards errors).
 2. Normal average of the values with associated error and calculate propagation of uncertainty (disregards values with no associated error).
 3. Weighted average, which would only consider measurements that have an associated error (disregards values with no associated error).
 4. Value with the smallest error, or in case of equal errors, the average of the values with the smallest error (disregards values with no associated error).
 5. Value with the smallest percent uncertainty (disregards values with no associated error).
 6. Calculate the standard deviation and disregard values beyond average +/- 1SD --> Take the average of the values (disregards errors).
 7. Returns the first value stored from nasa exoplanet archive.
 8. Taking the median value.

Do all of the above but only for 'recent' measurements (choose a cutoff year - 2000s)

In [326]:
def final(sweet_star, criteria, cutoff_year, cut_off): #not to be used for criteria 7 
    '''
    sweet_star: name of the star
    criteria: 
    cutoff_year: True or False
    '''
    result = criteria(sweet_star, cutoff_year, cut_off)
    value, err1, err2 = result
    return  sweet_star + ': vsini = {} +/- ({},{})'.format(value, err1, err2)

In [430]:
def cut_off_year(star_name, cut_off):
    '''
    Do all of the above but only for 'recent' measurements
    (choose a cutoff year - 2000s)
    sweet_star: name of SWEET-Cat star
    cut_off: year we consider adequate
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    output: dataframe but only with values published after cut_off year
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
    
    dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    for i, cat in enumerate(dataframe['Catalog']):
        if cat == 'simbad':
            ref = dataframe['Ref'][i]
            if type(ref) == str:
                year = int(ref[:4])
            else:
                year = 0
        if cat == 'nasa_exoplanet_archive':
            ref = dataframe['Ref'][i]
            if type(ref) == str:
                ref = ref.split(' ')
                year = int(ref[1][-4:])
            else:
                year = 0
        if cat == 'vizier_J/A+A/594/A39/':
            year = 2016
        if cat == 'vizier_J/AJ/163/179/':
            year = 2022
        if cat == 'vizier_J/A+A/664/A78/':
            year = 2022
        if cat == 'vizier_III/244/':
            year = 2005
        if cat == 'vizier_J/ApJS/237/38/':
            year = 2018
        if cat == 'vizier_J/ApJ/919/138/':
            year = 2021
        if cat == 'vizier_V/156/':
            year = 2019
        if cat == 'vizier_J/A+A/654/A137/':
            year = 2021
        if cat == 'vizier_J/A+A/662/A66/':
            year = 2022
        if year < cut_off:
            dataframe = dataframe.drop(labels=i,axis=0)
    
    return dataframe

In [433]:
cut_off_year('WASP-164', 2000)

Unnamed: 0,vsini,err_1,err_2,Ref,Catalog


In [389]:
def crit_1(star_name, cutoff_year, cut_off):
    '''
    Average of all values disregarding uncertainties 
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: Average of all values +/- nan 
    '''
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    if len(dataframe) > 0:
        return np.average(dataframe['vsini']), float('nan'), float('nan')
    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [390]:
print(final('11 Com', crit_1, True, 2000))

11 Com: vsini = 5.292 +/- (nan,nan)


In [391]:
def crit_2(star_name, cutoff_year, cut_off):
    '''
    Normal average of the values with associated error and calculate propagation of uncertainty 
    (disregards values with no associated error)
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: Average of all values +/- uncertainty
    '''
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    if len(dataframe) > 0:
        xs = []
        for i, err in enumerate(dataframe['err_1']):
            if not math.isnan(err):
                value = dataframe['vsini'][i]
                u = err
                x = ufloat(value, u)
                xs.append(x)
        res = sum(np.array(xs))/len(np.array(xs))
        val = res.n
        err1 = res.s
        err2 = - res.s
        return val, err1, err2
    
    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [392]:
print(final('11 Com', crit_2, True, 2000))

11 Com: vsini = 1.7866666666666668 +/- (0.6075908711186061,-0.6075908711186061)


In [393]:
def crit_3(star_name, cutoff_year, cut_off):
    '''
    Weighted average, which would only consider measurements that have an associated error 
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: weighted average +/- error
    '''    
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    label = []
    for i, err in enumerate(dataframe['err_1']):
        if pd.isna(err):
            label.append(i)
    dataframe = dataframe.drop(labels=label, axis=0)
    dataframe = dataframe.reset_index()
    dataframe = dataframe.drop(labels='index', axis=1)
    
    if len(dataframe) > 0:
        weighted_average = np.average(a=dataframe['vsini'], weights=dataframe['err_1'])
        variance = np.average((dataframe['vsini']-weighted_average)**2, weights=dataframe['err_1'])
        return weighted_average, variance, -variance
    
    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [394]:
print(final('11 Com', crit_3, True, 2000))

11 Com: vsini = 1.8425396825396827 +/- (0.7180125976316453,-0.7180125976316453)


In [395]:
def crit_4(star_name, cutoff_year, cut_off):
    '''
    Value with the smallest error, or in case of equal errors, the average of the values with the smallest error 
    (disregards values with no associated error) 
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: Average +/- error
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    if len(dataframe) > 0:
        smallest_err = dataframe['err_1'].min(0, skipna = True)
        values = dataframe['vsini'][dataframe['err_1']==smallest_err]
        xs = []
        for i, value in enumerate(values):
            x = ufloat(value, smallest_err)
            xs.append(x)
        res = sum(np.array(xs))/len(np.array(xs))
        return res.n, res.s, float('nan')
    
    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [396]:
print(final('11 Com', crit_4, True, 2000))

11 Com: vsini = 1.2 +/- (0.7071067811865476,nan)


In [397]:
def crit_5(star_name, cutoff_year, cut_off):
    '''
    Value with the smallest percent uncertainty 
    (disregards values with no associated error)
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: value +/- error
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
        
    #Value with the smallest percent uncertainty and then weighted average if they are more than 1
    per = 100
    values = []
    errors = []
    for i, err in enumerate(dataframe['err_1']):
        if not pd.isna(err):
            percent = err/dataframe['vsini'][i] *100
            if percent <= per: 
                values.append(dataframe['vsini'][i])
                errors.append(err) 
                per = percent
    
    if len(values) == 1:
        res = ufloat(np.average(dataframe['vsini']), float('nan'))
        return res.n, res.s, float('nan')
    if len(values) > 1:
        return crit_3     
    if len(values) == 0:
        return float('nan'), float('nan'), float('nan')

In [398]:
print(final('11 Com', crit_5, True, 2000))

11 Com: vsini = 5.292 +/- (nan,nan)


In [399]:
def crit_6(star_name, cutoff_year, cut_off):
    '''
    Calculate the standard deviation and disregard values beyond average +/- 1SD
    Take the average of the values (disregards errors)
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: Average +/- nan
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    if len(dataframe)>=2:
        av = np.average(dataframe['vsini'])
        var = statistics.variance(dataframe['vsini'])
        sd = np.sqrt(var)

        label = []
        for i,x in enumerate(dataframe['vsini']):
            if x > av + sd or x < av - sd:
                label.append(i)
        dataframe = dataframe.drop(labels=label)

        return np.average(dataframe['vsini']), float('nan'), float('nan')
    if len(dataframe) < 2:
        return float('nan'), float('nan'), float('nan')

In [400]:
print(final('11 Com', crit_6, True, 2000))
print(final('1SWASP J1407', crit_6, True, 2000))

11 Com: vsini = 1.6150000000000002 +/- (nan,nan)
1SWASP J1407: vsini = nan +/- (nan,nan)


In [401]:
def crit_7(star_name, cutoff_year, cut_off):
    '''
    Returns the first value stored from nasa exoplanet archive
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: value +/- error
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
        
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    if len(dataframe) > 0:
        for i, cat in enumerate(dataframe['Catalog']):
            if cat == 'nasa_exoplanet_archive':
                value = dataframe['vsini'][i]
                err_1 = dataframe['err_1'][i]
                err_2 = dataframe['err_2'][i]
                return '{} +/- ({} / {})'.format(value, err_1, err_2)

    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [402]:
print(crit_7('11 Com', True, 2000))

1.2 +/- (1.0 / -1.0)


In [403]:
def crit_8(star_name, cutoff_year, cut_off):
    '''
    Taking the median value
    sweet_star: name of SWEET-Cat star
    dataframe: pandas dataframe for sweet_star with the columns: ['vsini', 'err_1', 'err_2', 'Ref', 'Catalog']
    cutoff_year: function that reduces the dataframe to only values after the year cut_off
    output: median +/- nan
    '''        
    gaia2_id = sweet_table[sweet_table['Name'] == star_name]['gaia_dr2']
    
    star_id2_simbad = "gaia_dr2 " + str(gaia2_id[0])
    
    if "/" in star_name or "*" in star_name:
        file_name = star_id2_simbad
    else:
        file_name = star_name
    
    if cutoff_year == True:
        dataframe = cut_off_year(star_name, cut_off)
        dataframe = dataframe.reset_index()
        dataframe = dataframe.drop(labels='index', axis=1)
    if cutoff_year == False:
        dataframe = pd.read_csv('{}.txt'.format(file_name), sep='\t')
    
    if len(dataframe) > 0:
        return statistics.median(dataframe['vsini']), float('nan'), float('nan')
    
    if len(dataframe) == 0:
        return float('nan'), float('nan'), float('nan')

In [404]:
print(final('11 Com', crit_8, True, 2000))

11 Com: vsini = 1.2 +/- (nan,nan)


In [405]:
def nasa_vsini(star, file):
    table = pd.read_csv(file, sep = '\t')
    gaia2_id = sweet_table[sweet_table['Name'] == star]['gaia_dr2']
    star_id2 = "Gaia DR2 " + str(gaia2_id[0])
    value = table[table['gaia_id']==star_id2]['st_vsin']
    if len(value) > 0:
        value = value.values[0]
        err1 = table[table['gaia_id']==star_id2]['st_vsinerr1']
        err1 = err1.values[0]
        err2 = table[table['gaia_id']==star_id2]['st_vsinerr2']
        err2 = err2.values[0]
        ref = table[table['gaia_id']==star_id2]['st_refname']
        ref = ref.values[0]
        
        return value, err1, err2, ref
    else:
        return float('nan'), float('nan'), float('nan'), float('nan')

In [406]:
print(final('11 Com', crit_1, True, 2000)) #normal average (disregards errors)
print(final('11 Com', crit_2, True, 2000)) #normal average (just erros)
print(final('11 Com', crit_3, True, 2000)) #weighted average
print(final('11 Com', crit_4, True, 2000)) #smallest error
print(final('11 Com', crit_5, True, 2000)) #smallest percent uncertainty
print(final('11 Com', crit_6, True, 2000)) #standard deviation
print(final('11 Com', crit_8, True, 2000)) #median
print('11 Com: vsini = ', crit_7('11 Com', True, 2000)) #nasa
v, e1, e2, ref = nasa_vsini('11 Com', '_nasa_table.txt') #nasa
print('{}: vsini = {} +/- ({}, {})'.format('11 Com', v, e1, e2)) #nasa
print(ref)

11 Com: vsini = 5.292 +/- (nan,nan)
11 Com: vsini = 1.7866666666666668 +/- (0.6075908711186061,-0.6075908711186061)
11 Com: vsini = 1.8425396825396827 +/- (0.7180125976316453,-0.7180125976316453)
11 Com: vsini = 1.2 +/- (0.7071067811865476,nan)
11 Com: vsini = 5.292 +/- (nan,nan)
11 Com: vsini = 1.6150000000000002 +/- (nan,nan)
11 Com: vsini = 1.2 +/- (nan,nan)
11 Com: vsini =  1.2 +/- (1.0 / -1.0)
11 Com: vsini = 1.2 +/- (1.0, -1.0)
<a refstr=LIU_ET_AL__2008 href=https://ui.adsabs.harvard.edu/abs/2008ApJ...672..553L/abstract target=ref> Liu et al. 2008 </a>


## Creating the final table
For the final table we'll only use criteria 1, 6 and 8 and the table from nasa exoplanet archive (default = 1)

In [440]:
cutoff_year = True
cut_off = 2000

criterion_1_values = []
criterion_1_err1 = []
criterion_1_err2 = []
criterion_6_values = []
criterion_6_err1 = []
criterion_6_err2 = []
criterion_8_values = []
criterion_8_err1 = []
criterion_8_err2 = []
nasa_values = []
nasa_err1 = []
nasa_err2 = []
nasa_refs = []

for star in tqdm(sweet_stars):
# for star in sweet_stars:
#     print(star)
    v_1, err1_1, err2_1 = crit_1(star, cutoff_year, cut_off)
    criterion_1_values.append(v_1)
    criterion_1_err1.append(err1_1)
    criterion_1_err2.append(err2_1)
    
    v_6, err1_6, err2_6 = crit_6(star, cutoff_year, cut_off)
    criterion_6_values.append(v_6)
    criterion_6_err1.append(err1_6)
    criterion_6_err2.append(err2_6)
    
    v_8, err1_8, err2_8 = crit_8(star, cutoff_year, cut_off)
    criterion_8_values.append(v_8)
    criterion_8_err1.append(err1_8)
    criterion_8_err2.append(err2_8)
    
    v_nasa, err1_nasa, err2_nasa, ref = nasa_vsini(star, '_nasa_table.txt')
    nasa_values.append(v_nasa)
    nasa_err1.append(err1_nasa)
    nasa_err2.append(err2_nasa)
    nasa_refs.append(ref)
    
di = {'Star' : sweet_stars, 'average: vsini (km/s)': criterion_1_values, 'average: err_1': criterion_1_err1, 'average: err_2': criterion_1_err2,
      'standard deviation: vsini (km/s)': criterion_6_values, 'standard deviation: err_1': criterion_6_err1, 'standard deviation: err_2': criterion_6_err2,
     'median: vsini (km/s)': criterion_8_values, 'median: err_1': criterion_8_err1, 'median: err_2': criterion_8_err2,
     'nasa: vsini (km/s)': nasa_values, 'nasa: err_1': nasa_err1, 'nasa: err_2': nasa_err2, 'nasa: reference': nasa_refs}

df = pd.DataFrame(di)

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
100%|██████████████████████████████████████████████████████████████████████████████| 3803/3803 [08:08<00:00,  7.79it/s]


In [445]:
sweet_stars[228]

'HATS-4'

In [446]:
sweet_stars[2379]

'WASP-10'

In [447]:
df

Unnamed: 0,Star,average: vsini (km/s),average: err_1,average: err_2,standard deviation: vsini (km/s),standard deviation: err_1,standard deviation: err_2,median: vsini (km/s),median: err_1,median: err_2,nasa: vsini (km/s),nasa: err_1,nasa: err_2,nasa: reference
0,11 Com,5.292000,,,1.615000,,,1.200,,,1.2,1.0,-1.0,<a refstr=LIU_ET_AL__2008 href=https://ui.adsa...
1,11 UMi,4.825000,,,1.433333,,,1.500,,,,,,<a refstr=STASSUN_ET_AL__2017 href=https://ui....
2,14 And,4.488333,,,1.386000,,,1.515,,,2.6,,,<a refstr=SATO_ET_AL__2008 href=https://ui.ads...
3,14 Her,1.966667,,,1.940000,,,2.000,,,,,,<a refstr=FENG_ET_AL__2022 href=https://ui.ads...
4,16 Cyg B,9.765556,,,3.281176,,,2.200,,,,,,<a refstr=STASSUN_ET_AL__2017 href=https://ui....
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3798,K2-405,,,,,,,,,,,,,<a refstr=CHRISTIANSEN_ET_AL__2022 href=https:...
3799,K2-406,,,,,,,,,,,,,<a refstr=CHRISTIANSEN_ET_AL__2022 href=https:...
3800,K2-407,,,,,,,,,,,,,<a refstr=CHRISTIANSEN_ET_AL__2022 href=https:...
3801,K2-408,,,,,,,,,,,,,<a refstr=CHRISTIANSEN_ET_AL__2022 href=https:...


In [448]:
df.to_csv('_final_.txt', index = False, sep = '\t')