# Pelican Data Extract and 3D Velocity Calculations
*Eric G. Suchanek, Ph.D. 3/15/19*

This code performs a Gaia rectangular search from 20h50m18s, 44d13m30s to 20h51m22s 44d25m0s and extracts:
* ra, dec, pmra, pmdec, pmra_error, pmdec_error, parallax, parallax_error, g_mean_mag

I've defined some queries and functions to pull stars by ID.


In [6]:
#
# Setup the libraries. 

import astropy.units as u
import astropy.coordinates as coord

from astropy.coordinates import Latitude, Longitude, Angle, SkyCoord
#from astropy.units import Quantity

from astroquery.gaia import Gaia
from astropy.io import ascii
import numpy as np
import time

import progressbar

import pandas as pd
from pandas import read_csv
%matplotlib inline

# Suppress warnings. Comment this out if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')

# raw string from Gaia
q_str = "SELECT TOP 999999 gaia_source.source_id,gaia_source.ra,gaia_source.ra_error,\
gaia_source.dec,gaia_source.dec_error,gaia_source.parallax,gaia_source.parallax_error,\
gaia_source.pmra,gaia_source.pmra_error,gaia_source.pmdec,gaia_source.pmdec_error,\
gaia_source.phot_g_mean_mag,gaia_source.radial_velocity,gaia_source.radial_velocity_error,\
gaia_source.phot_variable_flag FROM gaiadr2.gaia_source  \
WHERE CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),\
BOX('ICRS',312.7083333333333,44.32083333333333,0.2666666666666515,0.19166666666666288))=1  "


# only pull non-null parallax, pmra, and pmdec
sel_str_box2 = "SELECT TOP 2000 gaia_source.source_id,gaia_source.ra,gaia_source.ra_error,\
gaia_source.dec,gaia_source.dec_error,gaia_source.parallax,gaia_source.parallax_error,\
gaia_source.pmra,gaia_source.pmra_error,gaia_source.pmdec,gaia_source.pmdec_error,\
gaia_source.phot_g_mean_mag FROM gaiadr2.gaia_source  \
WHERE CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),BOX('ICRS',{},{},{},{}))=1 \
AND gaia_source.parallax IS NOT NULL \
AND gaia_source.pmra IS NOT NULL \
AND gaia_source.pmdec IS NOT NULL ;"

sel_str_box = "SELECT TOP 5000 * FROM gaiadr2.gaia_source  \
WHERE CONTAINS(POINT('ICRS',gaiadr2.gaia_source.ra,gaiadr2.gaia_source.dec),BOX('ICRS',{},{},{},{}))=1 \
AND gaia_source.parallax IS NOT NULL \
AND gaia_source.pmra IS NOT NULL \
AND gaia_source.pmdec IS NOT NULL ;"


sel_str_box_id_restricted = "SELECT TOP 2000 * FROM gaiadr2.gaia_source  \
WHERE  gaia_source.source_id = {} \
AND gaia_source.parallax IS NOT NULL \
AND gaia_source.pmra IS NOT NULL \
AND gaia_source.pmdec IS NOT NULL \
AND ;"

sel_str_box_id = "SELECT * FROM gaiadr2.gaia_source  WHERE gaia_source.source_id ={};"

# Bruce's search parameters - area 1

_ra1 = "20h50m18s" # 300.8383
_dec1 = "44d13m30s" # 44.225

_ra2 = "20h51m22s" # 300.8561
_dec2 = "44d25m0s" # 44.41667

project_name_mira = "./area1/mira_"
project_name_gaia = "./area1/gaia_"

# files containing the source_ids

mira_filename = "./area1/mira_stars.csv"
gaia_filneme = "./area1/gaia_stars.csv"

# read the mira selection list
mira_stars = read_csv(mira_filename)
mira_stars= mira_stars.dropna()

#print(mira_stars_selections)



In [7]:
"""
# ipython_exit.py
Allows exit() to work if script is invoked with IPython without
raising NameError Exception. Keeps kernel alive.

Use: import variable 'exit' in target script with
     'from ipython_exit import exit'    
"""

import sys
from io import StringIO
from IPython import get_ipython


class IpyExit(SystemExit):
    """Exit Exception for IPython.

    Exception temporarily redirects stderr to buffer.
    """
    def __init__(self):
        # print("exiting")  # optionally print some message to stdout, too
        # ... or do other stuff before exit
        sys.stderr = StringIO()

    def __del__(self):
        sys.stderr.close()
        sys.stderr = sys.__stderr__  # restore from backup

def ipy_exit():
    raise IpyExit

if get_ipython():    # ...run with IPython
    exit = ipy_exit  # rebind to custom exit
else:
    exit = exit      # just make exit importable

In [13]:
########### do the query ########

project_name_mira = './area1/A1_'
data_filename = project_name_mira + _ra1 + "_" + _dec1 + "_" + _ra2 + "_" + _dec2 + "_" + "_deg.csv"


# Area 1 query
q = str.format(sel_str_box,312.708333,44.3208333,0.2666667,0.1966667)


start = time.time()
job = Gaia.launch_job_async(query=q, dump_to_file=True, 
                                output_file=data_filename, output_format='csv')
end = time.time()
secs = end - start
print("Query completed in %.2f" % secs)


stars_pm = job.get_results()

star_count = len(stars_pm)  # global variable

stars_pandas = read_csv(data_filename)
stars_pandas.fillna(np.nan)

star_count2 = len(stars_pandas)

if star_count != star_count2:
    print("Unequal star counts: %d %d" % (star_count, star_count2))

_ra = stars_pm['ra']
_dec = stars_pm['dec']

# convert to long and lat to get the units right
ra = Longitude(stars_pm['ra'],unit=u.deg)
dec = Latitude(stars_pm['dec'],unit=u.deg)

print(str.format("Returned: {} stars", star_count))


Query completed in 6.32
Returned: 912 stars


In [5]:
# pandas data frame iterators

def find_id(stars,id_list):
    tot = len(id_list)
    star_df = pd.DataFrame()
    
    for index,row in id_list.iterrows():
        source_id = row['source_id']
        mira_id = row['mira_id']
        try:
            star = stars.loc[source_id]
            star['mira_id'] = mira_id
            star_df.append(star)
        except:
            print("can't find %s " % source)
    return

def find_single_id(stars,id):
    star = pd.DataFrame()
    try:
        star = stars.loc[id]
        return star
    except:
        print("can't find %s " % id)

    return

# files containing the source_ids

mira_filename = "./area1/mira_stars_short.csv"
gaia_filneme = "./area1/gaia_stars.csv"

# read the mira selection list, set the index. change all to objects (string like)
# mira_stars has the mira_id and source_id

df = read_csv(mira_filename,dtype='object')
mira_selections = df.dropna()

# mira_selections = df.set_index('mira_id')

# read the query result file from above
stars_pandas = read_csv(data_filename,dtype='object')

#print(stars_pandas['source_id'])

stars_pandas = stars_pandas.set_index('source_id')
print(stars_pandas.loc['2163136746512695168'])

star = pd.DataFrame()
star = find_single_id(stars_pandas,'2163136334195866624')
#print(star)

#m1 = str(mira_selections['source_id'][0])
#print(m1)
#print (m1)
#star = find_single_id(stars,m1)
#print(star)

0      2163136334195866624
1      2163136746512695808
2      2163139804529411584
3      2163136746512695424
4      2163139873248885632
5      2163136746512695168
6      2163136123737391104
7      2163136918311386240
8      2163136128037403648
9      2163136746512694528
10     2163139804529411328
11     2163136162397141504
12     2163139941968363136
13     2163136128037403264
14     2163139873248886272
15     2163136505993357568
16     2163136059317926016
17     2163136712152954496
18     2163136093677664000
19     2163136849591908224
20     2163136059317925248
21     2163136505994523648
22     2163136471634785536
23     2163136677793214976
24     2163136712152953728
25     2163139628431535104
26     2163136609073737856
27     2163136609073737728
28     2163136471634784640
29     2163140388643785344
              ...         
882    2067059565049974400
883    2067062588707059584
884    2067059702485631872
885    2067058701758542080
886    2067059388953309184
887    2067063963096594176
8

In [None]:
#print(stars['source_id',:10])
#stars = all_st.set_index('source_id')


found = 0
for index, row in mira_selections.iterrows():
    id = row['source_id']
    for index2, row2 in stars.iterrows():       
        # print(id)
        id2 = row2['source_id']
        print(id, ' ', id2)
        break
        if id == id2:
            print('Found! ',id1,id1)
            found += 1

print('found')

mira_selections[:2]

In [None]:
#z = mira_selections['2163136334195870000']
#print(z)

#
# read the query result file from above
all_st = read_csv(vel_filename,dtype='object')
stars = all_st.set_index('source_id')

#x = stars.loc['2163136334195870000']

#print(stars[:1])

matched_stars = pd.DataFrame()
matched_stars = find_single_id(stars,'2163136334195866624')
if (matched_stars):
    print(matched_stars)
else:
    print('nope')
    
y = stars.loc['2163136334195866624']
print(y)


#matched_stars = find_id(all_stars,mira)

#id_to_check = np.asint('2163136334195870000') # once example from 

#mira

In [None]:
x = stars.loc['809148283']
print(x)

In [None]:
y = stars.loc['80826743']

In [None]:
print(y)

In [None]:
stars2 = stars_in
stars2.set_index('source_id')
stars2
y = stars2.loc['2163136334195870000']