### Plotting Test
Alex is using this Jupyter notebook to test the plotting + print statements from query.py.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from lsst.rsp import get_tap_service
import query as q

##### Testing object classifcation
There are two ways this classification function works:
1. Providing orbit parameters, returning a corresponding object type
2. Providing an object type, reutrning corresponding orbit parameters

In [2]:
# 1
input_params = {
        "q_cutoff_min": None, 
        "q_cutoff": 1.3, 
        "a_cutoff_min": None, 
        "a_cutoff": 4.0, 
        "e_cutoff_min": None, 
        "e_cutoff": 1.0}
object_type = q.type_classification(input_params = input_params)
print(object_type)

#2
params = q.type_classification(object_type = "NEO")
print(params)

NEO
{'q_cutoff': 1.3, 'a_cutoff': 4.0, 'e_cutoff': 1.0}


Now that know our classification works, let's try making a full query. 

In [3]:
# testing with NEOs
query, object_type = q.make_query_general(object_type = "NEO", join = 'Diasource')
print(query)
print(object_type)

query, object_type = q.make_query_general(q_cutoff=1.3, a_cutoff=4.0, e_cutoff = 1.0, join = 'Diasource')
print(query)
print(object_type) 

SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, dias.magTrueVband, dias.band FROM dp03_catalogs_10yr.MPCORB as mpc
    INNER JOIN dp03_catalogs_10yr.DiaSource AS dias ON mpc.ssObjectId = dias.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;
NEO
SELECT mpc.incl, mpc.q, mpc.e, mpc.ssObjectID, dias.magTrueVband, dias.band FROM dp03_catalogs_10yr.MPCORB as mpc
    INNER JOIN dp03_catalogs_10yr.DiaSource AS dias ON mpc.ssObjectId = dias.ssObjectId
    WHERE mpc.q < 1.3 AND mpc.e < 1.0 AND mpc.q/(1-mpc.e) < 4.0;
NEO


In [4]:
# running the query
NEO_objects_table = q.run_query(query)

Job phase is COMPLETED
     incl              q                  e          ... magTrueVband band
     deg               AU                            ...                  
-------------- ------------------ ------------------ ... ------------ ----
9.299357471305 1.0442646753695681 0.5038040861600521 ...     22.00147    z
9.299357471305 1.0442646753695681 0.5038040861600521 ...    22.000353    i
9.299357471305 1.0442646753695681 0.5038040861600521 ...    21.968763    r
9.299357471305 1.0442646753695681 0.5038040861600521 ...    22.549915    z
9.299357471305 1.0442646753695681 0.5038040861600521 ...    21.820671    r


In [7]:
# Adding calculated + object_type columns to data table
a = q.calc_semimajor_axis(NEO_objects_table['q'], NEO_objects_table['e'])
NEO_objects_table['a'] = a
NEO_objects_table['object_type'] = object_type

print(NEO_objects_table[0:5]) # print first few rows 
print(NEO_objects_table.columns)

     incl              q          ...         a          object_type
     deg               AU         ...                               
-------------- ------------------ ... ------------------ -----------
9.299357471305 1.0442646753695681 ... 2.1045410617919846         NEO
9.299357471305 1.0442646753695681 ... 2.1045410617919846         NEO
9.299357471305 1.0442646753695681 ... 2.1045410617919846         NEO
9.299357471305 1.0442646753695681 ... 2.1045410617919846         NEO
9.299357471305 1.0442646753695681 ... 2.1045410617919846         NEO
<TableColumns names=('incl','q','e','ssObjectID','magTrueVband','band','a','object_type')>


In [8]:
q.plot_data(NEO_objects_table)
# # Manual Plotting Code 
# fig, ax = plt.subplots()
# # plt.xlim([0., 4.])
# # plt.ylim([0., 1.])
# ax.scatter(NEO_objects_table["a"], NEO_objects_table["e"], s=0.1) # a vs. i
# ax.set_xscale('log')
# ax.set_xlabel('semimajor axis (au)')
# ax.set_ylabel('eccentricity')
# ax.set_title("a vs. e (NEO)")
# ax.minorticks_on()
# ax.grid()
# plt.show()

UnboundLocalError: cannot access local variable 'object_type' where it is not associated with a value

In [9]:
# q.type_counts(NEO_objects_table)
print(NEO_objects_table.columns)
df = NEO_objects_table.to_pandas()
print(df['object_type'].value_counts())
type(df['object_type'].value_counts())

<TableColumns names=('incl','q','e','ssObjectID','magTrueVband','band','a','object_type')>
object_type
NEO    2135926
Name: count, dtype: int64


pandas.core.series.Series

#### How many observations for each object? In what filters?

In [10]:
# need to count observations for each unique object in SSO_id
print(df['ssObjectID'].value_counts())

ssObjectID
-6531816816762911299    1708
-7495537061370576217    1316
-8960445666237157474    1084
 5885925787919145121     993
-3541095373223286452     991
                        ... 
 8210757616290374451       6
-5675138216577684964       6
-317670342693814751        6
-7554881316084645816       6
 4738261344712572538       6
Name: count, Length: 39470, dtype: int64


In [11]:
# unique observations within each filter
print(df['band'].value_counts())

band
r    754794
i    686043
z    419419
g    275670
Name: count, dtype: int64


In [17]:
# count of unique observations for each unique object in SSO_id within each filter
print(df.groupby(['ssObjectID', 'band']).size().reset_index(name='obs_filter_count'))


                 ssObjectID band  obs_filter_count
0      -9222952578217572358    g                 4
1      -9222952578217572358    i                11
2      -9222952578217572358    r                12
3      -9222952578217572358    z                 6
4      -9222820784385736130    g                 7
...                     ...  ...               ...
153755  9222765823057774610    z                 4
153756  9222996671474961129    g                 1
153757  9222996671474961129    i                 5
153758  9222996671474961129    r                 7
153759  9222996671474961129    z                 4

[153760 rows x 3 columns]


#### What is the average magnitude range? Does any object have an unusually large range?

In [6]:
service = get_tap_service("ssotap")
assert service is not None
results = service.search("SELECT column_name, datatype, description, "
                         "unit from TAP_SCHEMA.columns "
                         "WHERE table_name = 'dp03_catalogs_10yr.DiaSource'")
results.to_table().to_pandas()

Unnamed: 0,column_name,datatype,description,unit
0,band,char,Name of the band used to take the exposure whe...,
1,ccdVisitId,int,Id of the ccdVisit where this diaSource was me...,
2,dec,double,Dec-coordinate of the center of this diaSource.,deg
3,decErr,float,Uncertainty of dec.,deg
4,decTrue,double,True (noiseless) declination of the simulated ...,
5,diaObjectId,long,Id of the diaObject this source was associated...,
6,diaSourceId,long,Unique id.,
7,mag,float,Magnitude. This is a placeholder and should be...,
8,magErr,float,Magnitude error. This is a placeholder and sho...,
9,magTrueVband,float,True (noiseless) V-band magnitude of the simul...,


In [7]:
results['description'][9]

np.str_('True (noiseless) V-band magnitude of the simulated diaSource')

In [7]:
df.columns

Index(['incl', 'q', 'e', 'ssObjectID', 'magTrueVband', 'a'], dtype='object')

In [9]:
# need to group by unique ssObjectID values
# create new dataframe to hold average of each group 

print(df.columns)

mag_data = df.groupby(['ssObjectID']).agg(
    mag_mean = ('magTrueVband', 'mean'))

print(mag_data)

Index(['incl', 'q', 'e', 'ssObjectID', 'magTrueVband', 'a', 'Class'], dtype='object')
                       mag_mean
ssObjectID                     
-9148239178685334337  21.998495
-9107056889034294513  32.579948
-9095432462150332340  18.361084
-8998070713432935796  19.151974
-8962368697841326271  21.892792
...                         ...
 8906832681098804506  21.802177
 9010956620758146911  22.967888
 9022961864831958355  21.921072
 9034671437756444180  32.840210
 9117326962743268631  23.037796

[492 rows x 1 columns]


#### Now need to look at the average range
For each object, need to find the average range? So, min and max for each observation?
-> I think this means the average range by object type, so I'm going to group by object type, then find min and max V values from each object type

In [10]:
df.groupby('Class')['magTrueVband'].min()

Class
NEO    12.841971
Name: magTrueVband, dtype: float32

In [11]:
df.groupby('Class')['magTrueVband'].max()

Class
NEO    35.641945
Name: magTrueVband, dtype: float32

In [13]:
q.type_counts(DiaSource_joined_table)

MBA Count: 0
NEO Count: 28945
TNO Count: 11521
Centaur Count: 770
LPC Count: 11594
