# Package Imports

In [1]:
import glob
import os
import numpy as np
import pandas as pd
import sqlite3 as sql
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
sns.set(font_scale=1.2, context="paper", style="ticks")
sns.set_palette("viridis")

import mysql.connector as mariadb
from astropy.time import Time

%matplotlib inline

import plotly
plotly.offline.init_notebook_mode(connected=True)

import sys
sys.path.append("..")

In [2]:
os.environ.get("CONDA_PREFIX")

'/epyc/opt/anaconda'

In [3]:
os.nice(1)

1

In [4]:
import thor

## Data Processing

In [5]:
# Connect to database
con = mariadb.connect(user='ztf', database='ztf')

In [6]:
# https://zwickytransientfacility.github.io/ztf-avro-alert/schema.html

In [7]:
# Read alerts for solar system objects from after the photometry fix 
sso_alert_fix_date1 = Time('2018-05-16T23:30:00', format='isot', scale='utc') # first attribution fix
sso_alert_fix_date2 = Time('2018-06-08T23:30:00', format='isot', scale='utc') # second attribution fix
sso_alert_phot_fix_date = Time('2018-06-18T23:30:00', format='isot', scale='utc') # photometry fix date

jd_good = sso_alert_phot_fix_date.jd
#ssdistnr >= 0 
df = pd.read_sql_query('select * from alerts where jd > {} and ssdistnr >= 0'.format(jd_good), con)
print(len(df))

1027255


In [8]:
df.sort_values(by=["jd"], inplace=True)
df.reset_index(inplace=True)

In [9]:
df = df[(df["rb"] >= 0.5) & (df["ndethist"] <= 4)]

In [10]:
df.head(10)

Unnamed: 0,index,objectId,jd,fid,pid,diffmaglim,programid,candid,isdiffpos,tblid,...,clrcounc,zpclrcov,zpmed,clrmed,clrrms,neargaia,neargaiabright,maggaia,maggaiabright,exptime
0,0,ZTF18abcdwzp,2458289.0,3,534178763215,19.8391,2,534178763215015012,t,12,...,,,,,,,,,,
1,2,ZTF18abcdxgz,2458289.0,3,534186883815,19.6875,2,534186883815015006,t,6,...,,,,,,,,,,
2,1,ZTF18abcdxlq,2458289.0,1,534194142015,20.492,2,534194142015015009,t,9,...,,,,,,,,,,
3,344,ZTF18abcdxlx,2458289.0,1,534194655515,20.551,1,534194655515015006,t,6,...,,,,,,,,,,
5,354,ZTF18abcdxkm,2458289.0,1,534195112615,20.3525,1,534195112615015010,t,10,...,,,,,,,,,,
6,350,ZTF18abcdxkw,2458289.0,1,534195112515,20.4844,1,534195112515015019,t,19,...,,,,,,,,,,
7,346,ZTF18abcdxln,2458289.0,1,534195113415,20.5017,1,534195113415015014,t,14,...,,,,,,,,,,
9,341,ZTF18abcdxmd,2458289.0,1,534195112315,20.4401,1,534195112315015000,t,0,...,,,,,,,,,,
11,339,ZTF18abcdxlb,2458289.0,1,534195116015,20.3943,1,534195116015015011,t,11,...,,,,,,,,,,
15,378,ZTF18abcdxsa,2458289.0,1,534195113915,20.478,1,534195113915015015,t,15,...,,,,,,,,,,


In [11]:
columnMapping = {        
        
        # Observation ID
        "obs_id" : "obs_id",
        
        # Exposure time
        "exp_mjd" : "exp_mjd",
        
        # Visit ID
        "visit_id" : "visit_id",
        
        # Field ID
        "field_id" : "field",
        
        # Field RA in degrees
        "field_RA_deg" : "fieldRA_deg",
        
        # Field Dec in degrees
        "field_Dec_deg" : "fieldDec_deg",
        
        # Night number
        "night": "nid",
        
        # RA in degrees
        "RA_deg" : "ra",
        
        # Dec in degrees
        "Dec_deg" : "decl",
        
        # Observer's x coordinate in AU
        "obs_x_au" : "HEclObsy_X_au",
        
        # Observer's y coordinate in AU
        "obs_y_au" : "HEclObsy_Y_au",
        
        # Observer's z coordinate in AU
        "obs_z_au" : "HEclObsy_Z_au",
        
        # Magnitude (UNUSED)
        "mag" : "magpsf",
        
        ### Truth Parameters
        
        # Object name
        "name" : "designation",
        
        # Observer-object distance in AU
        "Delta_au" : "Delta_au",
        
        # Sun-object distance in AU (heliocentric distance)
        "r_au" : "r_au",
        
        # Object's x coordinate in AU
        "obj_x_au" : "HEclObj_X_au",
        
        # Object's y coordinate in AU
        "obj_y_au" : "HEclObj_Y_au",
        
        # Object's z coordinate in AU
        "obj_z_au" : "HEclObj_Z_au",
        
        # Object's x velocity in AU per day
        "obj_dx/dt_au_p_day" : "HEclObj_dX/dt_au_p_day",
        
        # Object's y velocity in AU per day
        "obj_dy/dt_au_p_day" : "HEclObj_dY/dt_au_p_day",
        
        # Object's z velocity in AU per day
        "obj_dz/dt_au_p_day" : "HEclObj_dZ/dt_au_p_day",
        
        # Semi-major axis
        "a_au" : "a_au",
        
        # Inclination
        "i_deg" : "i_deg",
        
        # Eccentricity
        "e" : "e",
    }

def prepareZTFAlertsForTHOR(alerts):
    # Sort by field and exposure time
    alerts.sort_values(by=["field", "jd"], inplace=True)
    
    # Add exposure time in MJD column
    times = Time(alerts["jd"], format="jd", scale="utc")
    times = times.mjd
    alerts["exp_mjd"] = times
    
    # Only keep columns we want
    alerts = alerts[["ra", "decl", "field", "nid", "jd", "exp_mjd", "magpsf", "sigmapsf", "fid", "ssnamenr"]]
    
    # Read footprint and get field center locations
    footprint = pd.read_csv("/epyc/projects/thor/ztf_information/field_grid/ZTF_Fields.txt",
                        delim_whitespace=True, 
                        skiprows=1,
                        names=["field", "fieldRA_deg", "fieldDec_deg", "Ebv", "Gal Long", "Gal Lat", "Ecl Long", "Ecl Lat" ,"Entry"])
    footprint = footprint[["field", "fieldRA_deg", "fieldDec_deg"]]
    

    survey = pd.DataFrame({"exp_mjd": alerts["exp_mjd"].unique(), "visit_id" : np.arange(1, len(alerts["exp_mjd"].unique()) + 1)})
    eph = thor.propagateTestParticle([1, 1, 0, 0.0002, 0.0002, 0.000], 59580, survey["exp_mjd"].values, observatoryCode="I41")
    survey = survey.merge(eph[["mjd", 'HEclObsy_X_au', 'HEclObsy_Y_au', 'HEclObsy_Z_au']], left_on="exp_mjd", right_on="mjd")
    
    observations = alerts.merge(footprint, left_on="field", right_on="field")
    observations = observations.merge(survey, on="exp_mjd")

    observations["obs_id"] = np.arange(1, len(observations) + 1)
    
    return observations

In [12]:
observations = prepareZTFAlertsForTHOR(df)
del df

In [13]:
observations.to_csv("../analysis/ztf/known_object_observations.txt", sep=" ", index=False)

In [14]:
observations = pd.read_csv("../analysis/ztf/known_object_observations.txt", sep=" ", index_col=False)

In [15]:
observations.head(10)

Unnamed: 0,ra,decl,field,nid,jd,exp_mjd,magpsf,sigmapsf,fid,ssnamenr,fieldRA_deg,fieldDec_deg,visit_id,mjd,HEclObsy_X_au,HEclObsy_Y_au,HEclObsy_Z_au,obs_id
0,3.424106,-24.019268,245,606,2458361.0,58360.399954,18.471,0.132737,1,186910,1.16039,-24.25,1,58360.399954,0.927519,-0.399013,3.6e-05,1
1,1.823979,-26.530899,245,606,2458361.0,58360.399954,17.1856,0.050869,1,277529,1.16039,-24.25,1,58360.399954,0.927519,-0.399013,3.6e-05,2
2,2.427473,-24.440361,245,609,2458364.0,58363.334387,18.8113,0.129127,1,113739,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,3
3,0.060111,-26.851968,245,609,2458364.0,58363.334387,19.2119,0.144335,1,84685,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,4
4,2.551338,-24.99579,245,609,2458364.0,58363.334387,19.3703,0.172599,1,215529,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,5
5,1.749114,-27.423433,245,609,2458364.0,58363.334387,18.3123,0.097898,1,363132,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,6
6,359.865286,-26.27835,245,609,2458364.0,58363.334387,19.4659,0.201638,1,272471,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,7
7,357.542321,-24.481701,245,609,2458364.0,58363.334387,19.5034,0.155812,1,360331,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,8
8,1.420715,-26.591735,245,609,2458364.0,58363.334387,18.1973,0.076538,1,233487,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,9
9,2.085536,-27.214909,245,609,2458364.0,58363.334387,17.1913,0.055068,1,277529,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,10


In [16]:
# Unique solar system objects in ZTF alerts
observations["ssnamenr"].nunique()

119688

In [17]:
# Multiple formats for ssnamenr field in alerts db
np.random.choice(observations[observations["ssnamenr"].str.isnumeric()]["ssnamenr"].values, 20)

array(['67779', '39155', '3376', '30656', '176737', '67973', '8355',
       '209930', '31278', '7384', '30892', '45991', '16131', '686',
       '13157', '81930', '28904', '120295', '122302', '5632'],
      dtype=object)

In [18]:
np.random.choice(observations[~observations["ssnamenr"].str.isnumeric()]["ssnamenr"].values, 20)

array(['2017SN14', '2018RG31', '2001QU158', '2015AV00', '2014UT163',
       '2005WB159', '2007RK54', '2018NP04', '2014UM217', '2006KU15',
       '2018QE01', '2010MY99', '2012KC52', '2005UN353', '2013SK101',
       '2013VW09', '2005ON04', '2015AA253', '2014SH210', '2017DP37'],
      dtype=object)

In [19]:
mpcorb = thor.readMPCORBFile("../analysis/ztf/MPCORB_20181106.DAT")
# Drop two NaN rows... (population split)
mpcorb.drop(index=mpcorb[mpcorb["designation"].isna()].index, inplace=True)

In [20]:
np.random.choice(mpcorb[mpcorb["designation"].str.isnumeric()]["designation"].values, 20)

array(['37371', '03607', '49602', '49277', '66854', '14568', '61268',
       '36617', '84323', '38730', '28436', '62971', '09948', '44699',
       '28251', '97663', '74894', '99171', '34166', '88404'], dtype=object)

In [21]:
np.random.choice(mpcorb[~mpcorb["designation"].str.isnumeric()]["designation"].values, 20)

array(['K12XF6E', 'K16W34S', 'e6701', 'l3836', 'N1475', 'K07VC2O',
       'c8970', 'h9970', 'i8292', 'V0449', 'I3702', 'S7105', 'K18R37K',
       'K5017', 'K05U03T', 'T4418', 'V2813', 'K14KA6Q', 'c6722', 'l5753'],
      dtype=object)

In [22]:
mpcorb[mpcorb["readableDesignation"].str.isnumeric()]["readableDesignation"].values

array([], dtype=object)

In [23]:
np.random.choice(mpcorb[~mpcorb["readableDesignation"].str.isnumeric()]["readableDesignation"].values, 20)

array(['(433231) 2012 VT14', '(153846) 2001 XL42', '2008 CL14',
       '(492320) 2014 BF47', '2013 KM18', '(300607) 2007 TA441',
       '2014 RL42', '(307154) 2002 CF286', '(419129) 2009 SU240',
       '(206933) 2004 RS49', '(326329) 2000 EY15', '(91967) 1999 VZ91',
       '(80795) 2000 CE87', '(413356) 2003 YY67', '2016 TN78',
       '(124747) 2001 SP211', '2008 SH297', '(437218) 2012 WJ23',
       '2014 UV57', '2002 VX116'], dtype=object)

In [24]:
# Do pandas magic... 
split_readable_designation = mpcorb["readableDesignation"].str.split(" ", expand=True)
mpcorb.loc[split_readable_designation[0].str.contains("\("), "splitname"] = split_readable_designation[0]
mpcorb.loc[~mpcorb["splitname"].isna(), "splitname"] = mpcorb[~mpcorb["splitname"].isna()]["splitname"].map(lambda x: x.lstrip('\(').rstrip('\)'))
mpcorb.loc[(~mpcorb["readableDesignation"].str.contains("\(")) 
           & (mpcorb["readableDesignation"].str.contains(" ")), "splitname"] = mpcorb[(~mpcorb["readableDesignation"].str.contains("\(")) 
                                                                                      & (mpcorb["readableDesignation"].str.contains(" "))]["readableDesignation"].str.replace(" ", "")

In [25]:
observations = observations.merge(mpcorb[["designation", "splitname"]], left_on="ssnamenr", right_on="splitname", how='left')

In [26]:
observations["ssnamenr"].nunique()

119688

In [27]:
observations["designation"].nunique()

118325

In [28]:
observations.head()

Unnamed: 0,ra,decl,field,nid,jd,exp_mjd,magpsf,sigmapsf,fid,ssnamenr,fieldRA_deg,fieldDec_deg,visit_id,mjd,HEclObsy_X_au,HEclObsy_Y_au,HEclObsy_Z_au,obs_id,designation,splitname
0,3.424106,-24.019268,245,606,2458361.0,58360.399954,18.471,0.132737,1,186910,1.16039,-24.25,1,58360.399954,0.927519,-0.399013,3.6e-05,1,I6910,186910
1,1.823979,-26.530899,245,606,2458361.0,58360.399954,17.1856,0.050869,1,277529,1.16039,-24.25,1,58360.399954,0.927519,-0.399013,3.6e-05,2,R7529,277529
2,2.427473,-24.440361,245,609,2458364.0,58363.334387,18.8113,0.129127,1,113739,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,3,B3739,113739
3,0.060111,-26.851968,245,609,2458364.0,58363.334387,19.2119,0.144335,1,84685,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,4,84685,84685
4,2.551338,-24.99579,245,609,2458364.0,58363.334387,19.3703,0.172599,1,215529,1.16039,-24.25,2,58363.334387,0.945505,-0.352378,3.9e-05,5,L5529,215529


In [29]:
observations.to_csv("../analysis/ztf/known_object_observations.txt", sep=" ", index=False)

In [30]:
observations = pd.read_csv("../analysis/ztf/known_object_observations.txt", sep=" ", index_col=False)

In [None]:
! oorb --task=mpcorb --mpcorb=../analysis/ztf/MPCORB_20181106.DAT --orb-out=../analysis/ztf/MPCORB_20181106.orb --conf=../analysis/ztf/oorb.conf

In [None]:
fig, ax = plt.subplots(1, 1) 
hist = ax.hist(observations["nid"].values, bins=100)
ax.set_xlabel("Night ID")
ax.set_ylabel("Number of Observations")
ax.vlines([610, 624], 0, 60000)

In [31]:
observations = observations[(observations["nid"] >= 610) & (observations["nid"] <= 624)]

In [32]:
observations.to_csv("../analysis/ztf/known_object_observations_nid_610_624.txt", sep=" ", index=False)

In [33]:
observations = pd.read_csv("../analysis/ztf/known_object_observations_nid_610_624.txt", sep=" ", index_col=False)

In [None]:
observations.sort_values(by="mjd", inplace=True)

In [None]:
observations["mjd"].min()

In [None]:
fig, ax = thor.plotObservations(observations, columnMapping=columnMapping, usePlotly=False)

In [None]:
! oorb --task=propagation --orb-in=../analysis/ztf/MPCORB_20181106.orb --epoch-mjd-utc=58364.13048609997 --orb-out=../analysis/ztf/MPCORB_20181106_ZTF.orb --conf=/epyc/opt/oorb/main/oorb.conf 

In [None]:
# We also want keplerian elements
! oorb --task=propagation --orb-in=../analysis/ztf/MPCORB_20181106.orb --epoch-mjd-utc=58364.13048609997 --orb-out=../analysis/ztf/MPCORB_20181106_ZTF_keplerian.orb --conf=../analysis/ztf/oorb.conf 

In [None]:
! oorb --task=ephemeris --code=I41 --orb-in=../analysis/ztf/MPCORB_20181106_ZTF.orb --conf=../analysis/ztf/oorb.conf > ../analysis/ztf/MPCORB_20181106_ZTF.eph

In [None]:
simulated = thor.readEPHFile("../analysis/ztf/MPCORB_20181106_ZTF.eph")

In [None]:
simulated[simulated["designation"].isin(observations["designation"].unique())].to_csv("../analysis/ztf/known_object_observations_nid_610_624.eph", sep=" ", index=False)

In [None]:
orbits = thor.readORBFile("../analysis/ztf/MPCORB_20181106_ZTF_keplerian.orb")

In [None]:
orbits[orbits["designation"].isin(observations["designation"].unique())].to_csv("../analysis/ztf/known_object_observations_nid_610_624.orb", sep=" ", index=False)

In [34]:
# Read alerts for solar system objects from after the photometry fix 
sso_alert_fix_date1 = Time('2018-05-16T23:30:00', format='isot', scale='utc') # first attribution fix
sso_alert_fix_date2 = Time('2018-06-08T23:30:00', format='isot', scale='utc') # second attribution fix
sso_alert_phot_fix_date = Time('2018-06-18T23:30:00', format='isot', scale='utc') # photometry fix date

jd_good = sso_alert_phot_fix_date.jd
#ssdistnr >= 0 
df = pd.read_sql_query('select * from alerts where jd > {} and nid >= 610 and nid <= 624'.format(jd_good), con)
print(len(df))

4966353


In [35]:
df.sort_values(by=["jd"], inplace=True)
df.reset_index(inplace=True)

In [36]:
df = df[(df["rb"] >= 0.5) & (df["ndethist"] <= 4)]

In [37]:
observations = prepareZTFAlertsForTHOR(df)
del df

In [38]:
observations.to_csv("../analysis/ztf/observations_nid_610_624.txt", sep=" ", index=False)

In [39]:
# Unique solar system objects in ZTF alerts
observations["ssnamenr"].nunique()

62871

In [40]:
# Multiple formats for ssnamenr field in alerts db
np.random.choice(observations[observations["ssnamenr"].str.isnumeric()]["ssnamenr"].values, 20)

array(['427000', '302524', '323441', '38023', '334112', '40378', '281737',
       '205275', '102276', '114162', '128283', '466213', '200133',
       '53808', '368839', '87995', '66299', '318586', '67563', '146473'],
      dtype=object)

In [41]:
np.random.choice(observations[~observations["ssnamenr"].str.isnumeric()]["ssnamenr"].values, 20)

array(['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '',
       '', '', ''], dtype=object)

In [42]:
mpcorb = thor.readMPCORBFile("../analysis/ztf/MPCORB_20181106.DAT")
# Drop two NaN rows... (population split)
mpcorb.drop(index=mpcorb[mpcorb["designation"].isna()].index, inplace=True)

In [43]:
np.random.choice(mpcorb[mpcorb["designation"].str.isnumeric()]["designation"].values, 20)

array(['42597', '34644', '61906', '01234', '76301', '42388', '51905',
       '84168', '20510', '33653', '03594', '24624', '13445', '15519',
       '60836', '18693', '79488', '17862', '50324', '07005'], dtype=object)

In [44]:
np.random.choice(mpcorb[~mpcorb["designation"].str.isnumeric()]["designation"].values, 20)

array(['l4479', 'j7915', 'K15TZ3N', 'K1855', 'm8011', 'K03WI2H',
       'K10F08P', 'E1811', 'R0511', 'K16G75Y', 'Z8057', 'X0960', 'g1756',
       'H3325', 'M6609', 'i0965', 'S4696', 'K07M08Q', 'd5228', 'j6240'],
      dtype=object)

In [45]:
mpcorb[mpcorb["readableDesignation"].str.isnumeric()]["readableDesignation"].values

array([], dtype=object)

In [46]:
np.random.choice(mpcorb[~mpcorb["readableDesignation"].str.isnumeric()]["readableDesignation"].values, 20)

array(['2008 TO169', '2016 PC102', '(377796) 2006 AW32',
       '(472647) 2015 DA213', '2015 BC481', '(474146) 1998 RF13',
       '(302690) 2002 TE75', '2011 CL46', '(397381) 2006 UA361',
       '(376137) 2011 AW68', '(18756) 1999 GY34', '(279201) 2009 TF42',
       '2008 TA107', '(134317) 4117 T-1', '2016 YG8', '2016 EB229',
       '(367492) 2009 HB30', '2014 QS144', '(335073) 2004 RT293',
       '(179496) 2002 CT69'], dtype=object)

In [47]:
# Do pandas magic... 
split_readable_designation = mpcorb["readableDesignation"].str.split(" ", expand=True)
mpcorb.loc[split_readable_designation[0].str.contains("\("), "splitname"] = split_readable_designation[0]
mpcorb.loc[~mpcorb["splitname"].isna(), "splitname"] = mpcorb[~mpcorb["splitname"].isna()]["splitname"].map(lambda x: x.lstrip('\(').rstrip('\)'))
mpcorb.loc[(~mpcorb["readableDesignation"].str.contains("\(")) 
           & (mpcorb["readableDesignation"].str.contains(" ")), "splitname"] = mpcorb[(~mpcorb["readableDesignation"].str.contains("\(")) 
                                                                                      & (mpcorb["readableDesignation"].str.contains(" "))]["readableDesignation"].str.replace(" ", "")

In [48]:
observations = observations.merge(mpcorb[["designation", "splitname"]], left_on="ssnamenr", right_on="splitname", how='left')

In [49]:
observations["ssnamenr"].nunique()

62871

In [50]:
observations["designation"].nunique()

62307

In [51]:
observations.to_csv("../analysis/ztf/observations_nid_610_624.txt", sep=" ", index=False)