## Query.py Tutorial Doc

Load in the query file.

In [1]:
import sys
import os
sys.path.append(os.path.abspath('../../sso_query/'))
import query as q

For this tutotial, we'll be looking NEO objects. query.py contains a global dictionary that contains all searchable classes and their orbital parameters:

    "LPC": {"a_min": 50.0},
    "TNO": {"a_min": 30.1, "a_max": 50.0},
    "Ntrojan": {"a_min": 29.8, "a_max": 30.4},
    "NEO": {"q_max": 1.3, "a_max": 4.0, "e_max": 1.0},
    "MBA": {"q_min": 1.66, "a_min": 2.0, "a_max": 3.2},
    "Centaur": {"a_min": 5.5, "a_max": 30.1},
    "Jtrojan": {"a_min": 4.8, "a_max": 5.4, "e_max": 0.3},
    "JFC": {"tj_min": 2.0, "tj_max": 3.0}

The 'catalog' variable allows you to query different catalogs; the options are 'dp03_catalogs_10yr' and'dp1_catalogs_10yr'.

The 'join' variable allows you to perform an inner join with either 'DiaSource' or 'SSObject'.

##### Ex 1: DP03, Class name provided, no join. 

In [4]:
string, class_name = q.make_query("dp03_catalogs_10yr", class_name = "NEO", cutoffs = None, join = None)
print(string)

SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation FROM dp03_catalogs_10yr.MPCORB AS mpc
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;


##### Ex 2: DP03, Class name provided, join table with DiaSource

In [5]:
string, class_name = q.make_query("dp03_catalogs_10yr", class_name = "NEO", cutoffs = None, join = "DiaSource")
print(string)

Querying dp03_catalogs_10yr.DiaSource for: ['dias.magTrueVband', 'dias.band']
SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation, dias.magTrueVband, dias.band FROM dp03_catalogs_10yr.MPCORB AS mpc
    INNER JOIN dp03_catalogs_10yr.DiaSource AS dias ON mpc.ssObjectId = dias.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;


##### Ex 3: DP03, Cutoffs provided, no join

In [2]:
NEO_cutoffs = {"q_max": 1.3, "a_max": 4.0, "e_max": 1.0}

string, class_name = q.make_query("dp03_catalogs_10yr", class_name = None, cutoffs = NEO_cutoffs, join = None)
print(string)
print(class_name)

SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation FROM dp03_catalogs_10yr.MPCORB AS mpc
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;
NEO


##### Ex 4: DP03, Cutoffs provided, join with SSObject

In [3]:
string, class_name = q.make_query("dp03_catalogs_10yr", class_name = None, cutoffs = NEO_cutoffs, join = "SSObject")
print(string)
print(class_name)

Querying dp03_catalogs_10yr.SSObject for: ['sso.g_H', 'sso.r_H', 'sso.i_H', 'sso.discoverySubmissionDate', 'sso.numObs']
SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation, sso.g_H, sso.r_H, sso.i_H, sso.discoverySubmissionDate, sso.numObs, (sso.g_H - sso.r_H) AS g_r_color, (sso.r_H - sso.i_H) AS r_i_color FROM dp03_catalogs_10yr.MPCORB AS mpc
    INNER JOIN dp03_catalogs_10yr.SSObject AS sso ON mpc.ssObjectId = sso.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;
NEO


### Running your query

After generating your desired query, it's time to run it. 

The run_query function can return either an AstroPy table (to_pandas = False) or a Pandas dataframe (to_pandas = True). 

The function automatically prints the first few rows of your data. 

In [4]:
NEO_data_table = q.run_query(string, "dp03_catalogs_10yr", class_name, to_pandas = True)

Job phase is COMPLETED
   discoverySubmissionDate         e        g_H  g_r_color        i_H  \
0                  60747.0  0.396081  13.476295   0.669659  12.608139   
1                  60219.0  0.435987  18.141359   0.641882  17.297379   
2                  60422.0  0.763999  19.550732   0.634323  18.710812   
3                  60560.0  0.623179  18.261578   0.624949  17.455847   
4                  60224.0  0.712520  16.118109   0.659043  15.249735   

       incl mpcDesignation  numObs         q        r_H  r_i_color  \
0   8.45061        1929 SH      59  1.123543  12.806637   0.198498   
1  11.88325       1932 EA1     282  1.080947  17.499477   0.202099   
2   1.32170        1936 CA     254  0.441069  18.916409   0.205597   
3   6.06758        1937 UB     156  0.622294  17.636629   0.180782   
4   2.52162        1947 XC     321  0.625595  15.459065   0.209331   

            ssObjectID         a class_name  
0  3351269693330531197  1.860420        NEO  
1 -5234750409166262016  1

In [7]:
print(NEO_data_table.columns)

Index(['incl', 'q', 'e', 'ssObjectID', 'mpcDesignation', 'g_H', 'r_H', 'i_H',
       'discoverySubmissionDate', 'numObs', 'g_r_color', 'r_i_color', 'a',
       'class_name'],
      dtype='object')


### DP1 Example

##### Ex 5: DP1, Class name provided, no join

In [23]:
catalog = "dp1"
string, class_name = q.make_query(catalog, class_name = "NEO", cutoffs = None)
print(string)
DP1_NEO_no_join = q.run_query(string, catalog, class_name, to_pandas = True)

SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation FROM dp1.MPCORB AS mpc
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;
Job phase is COMPLETED
          e       incl mpcDesignation         q         ssObjectID         a  \
0  0.600738  13.227656      2017 SZ13  1.064235  21164736941994842  2.665507   
1  0.348615  33.468957     2000 EV106  1.074028  21163607131764310  1.648836   
2  0.531458   8.168040        2020 XK  1.066431  21165806472671307  2.276062   
3  0.524094  27.740362        2011 OB  0.430199  21164711105015874  0.903957   
4  0.169844  14.212929       2022 WH4  0.803089  21165815045829704  0.967396   

  class_name  
0        NEO  
1        NEO  
2        NEO  
3        NEO  
4        NEO  


# Works in Progress:

##### Ex 6: DP1, Class name provided, join with SSObject

In [21]:
expected_query = f"""SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation, sso.g_H, sso.r_H, sso.i_H, sso.discoverySubmissionDate, sso.numObs, (sso.g_H - sso.r_H) AS g_r_color, (sso.r_H - sso.i_H) AS r_i_color FROM dp1.MPCORB AS mpc
    INNER JOIN dp1.SSObject AS sso ON mpc.ssObjectId = sso.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;"""

DP1_NEO_no_join = q.run_query(expected_query, "dp1", "NEO", to_pandas=False)

Job phase is ERROR


DALQueryError: Query Error: IllegalArgumentException:Column: [g_H] not found in TapSchema

##### Ex 7: DP1, Class name provided, join with DiaSource

In [22]:
expected_query = f"""SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, mpc.mpcDesignation, dias.magTrueVband, dias.band FROM dp1.MPCORB AS mpc
    INNER JOIN dp1.DiaSource AS dias ON mpc.ssObjectId = dias.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;"""

DP1_NEO_no_join = q.run_query(expected_query, "dp1", "NEO", to_pandas=False)

Job phase is ERROR


DALQueryError: Query Error: IllegalArgumentException:Column: [magTrueVband] not found in TapSchema