## ZTF - Attribution
In this notebook, we run attribution on the two weeks of ZTF data from 2018 using a more recent orbit catalog available in 2021. We also update the ssnamenr provided by ZTF to be compatible with 

In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
from astropy.time import Time

import thor

from thor import __version__
print("THOR version: {}".format(__version__))

THOR version: 1.1.dev199+g1c54766.d20210401


In [2]:
preprocessed_observations = pd.read_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_observations.csv",
    index_col=False,
    dtype={
        "obs_id" : str
    }
)
preprocessed_associations = pd.read_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_associations.csv",
    index_col=False,
    dtype={
        "obs_id" : str
    }
)

In [3]:
from thor.utils import getMPCOrbitCatalog
from thor.utils import readMPCOrbitCatalog

#getMPCOrbitCatalog() Last run on April 20th 2021
mpcorb = readMPCOrbitCatalog()

In [4]:
last_observed = mpcorb["last_obs"].str.split("-", expand=True)
cols = ["year", "month", "day"]
last_observed.rename(columns={i : n for i, n in enumerate(cols)}, inplace=True)
for c in cols:
    last_observed[c] = last_observed[c].astype(int)

mask = (
    # Remove orbits that have an arc length less than 10 days
    (mpcorb["arc_days"].isna() | (mpcorb["arc_days"] >= 10)) 
    
    # Remove orbits that have had their eccentricity values set rather than fit 
    & (~mpcorb["uncertainty_param"].isin(["E"])) 
    
    # Remove orbits that have not been observed in the current century
    & (last_observed["year"] >= 2000)
)

print(len(mpcorb))
print(len(mpcorb[mask]))

1075888
1044313


In [5]:
from thor.orbits import Orbits

mpc_orbits = Orbits.fromMPCOrbitCatalog(mpcorb[mask])
mpc_orbits.to_csv("/mnt/data/projects/thor/thor_data/ztf/MPCORB_20210420.csv")

In [6]:
from thor.orbits import attributeObservations

attribution_file = "/mnt/data/projects/thor/thor_data/ztf/attributions_20210420.csv"
if not os.path.exists(attribution_file):
    attributions = attributeObservations(
        mpc_orbits,
        preprocessed_observations,
        eps=1/3600,
        include_probabilistic=True,
        orbits_chunk_size=100,
        observations_chunk_size=200000,
        threads=60,
        backend='PYOORB',
        backend_kwargs={},
    )
    attributions.to_csv(attribution_file, index=False)
else:
    attributions = pd.read_csv(
        attribution_file, 
        index_col=False, 
        low_memory=False,
        dtype={
            "obs_id" : str,
        }
    )

In [7]:
attributions

Unnamed: 0,orbit_id,obs_id,mjd_utc,distance,residual_ra_arcsec,residual_dec_arcsec,chi2,probability,mahalanobis_distance
0,100006,619483563715015157,58373.483565,0.000137,0.477276,-0.122193,24.272318,0.063444,5.515190
1,100006,621505463615015055,58375.505463,0.000081,0.241698,-0.162031,8.467188,0.205028,3.169222
2,100006,622508663315015053,58376.508669,0.000112,0.375438,-0.150549,16.361904,0.105889,4.490729
3,100006,623475553315015033,58377.475556,0.000095,0.309388,0.143574,11.633459,0.151788,3.770541
4,100006,623516093315015018,58377.516099,0.000080,0.216394,-0.192677,8.395061,0.211414,3.107878
...,...,...,...,...,...,...,...,...,...
266159,99970,620317971815015002,58374.317974,0.000053,-0.138432,-0.133353,3.694645,0.382141,1.923932
266160,99986,620266601615015005,58374.266609,0.000069,-0.202350,-0.144452,6.181192,0.288169,2.488420
266161,99987,619331292415015033,58373.331296,0.000032,-0.098039,0.062424,1.350831,0.558740,1.164143
266162,99987,622329122415015037,58376.329120,0.000061,-0.217708,0.026370,4.809220,0.333231,2.197837


In [8]:
attributions.sort_values(
    by=["obs_id", "distance"], 
    ascending=[True, True],
    inplace=True,
    ignore_index=True
)
attributions.drop_duplicates(
    subset=["obs_id"], 
    keep="first",
    inplace=True,
    ignore_index=True
)

In [9]:
# Create new file with 2021 attributions for analysis
preprocessed_associations_2021 = preprocessed_associations.merge(attributions[["obs_id", "orbit_id"]], on="obs_id", how="left")
preprocessed_associations_2021.loc[preprocessed_associations_2021["orbit_id"].isna(), "orbit_id"] = preprocessed_associations_2021.loc[preprocessed_associations_2021["orbit_id"].isna()]["obj_id"].values
preprocessed_associations_2021.to_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_associations_20210420.csv",
    index=False
)

In [10]:
analysis_attributions = attributions.merge(preprocessed_associations, on="obs_id")
analysis_attributions.rename(
    columns={
        "obj_id" : "prev_obj_id",
    },
    inplace=True
)


In [11]:
analysis_attributions

Unnamed: 0,orbit_id,obs_id,mjd_utc,distance,residual_ra_arcsec,residual_dec_arcsec,chi2,probability,mahalanobis_distance,prev_obj_id
0,27310,610130480115015030,58364.130486,0.000055,0.191133,-0.052943,3.933481,0.332598,2.201643,27310
1,14935,610130480115015043,58364.130486,0.000049,0.099153,-0.146610,3.132584,0.398454,1.840328,14935
2,2159,610130480515015028,58364.130486,0.000038,0.083303,0.109320,1.889009,0.487691,1.436148,2159
3,2421,610130480515015038,58364.130486,0.000020,0.061665,-0.033719,0.493953,0.680377,0.770216,2421
4,1768,610130480915015022,58364.130486,0.000045,0.161712,-0.008904,2.622993,0.404256,1.811414,1768
...,...,...,...,...,...,...,...,...,...,...
266148,253109,624525845215015002,58378.525845,0.000085,0.163875,-0.257433,9.312675,0.178898,3.441876,253109
266149,43423,624525845615015001,58378.525845,0.000079,0.199944,0.201718,8.066767,0.179317,3.437206,43423
266150,175469,624525845815015033,58378.525845,0.000031,0.065187,0.088565,1.209314,0.534090,1.254383,175469
266151,9816,624525845915015050,58378.525845,0.000024,0.063718,0.057155,0.732664,0.594314,1.040694,9816


In [12]:
analysis_attributions[analysis_attributions["orbit_id"] != analysis_attributions["prev_obj_id"]]

Unnamed: 0,orbit_id,obs_id,mjd_utc,distance,residual_ra_arcsec,residual_dec_arcsec,chi2,probability,mahalanobis_distance,prev_obj_id
201,523664,610145370015015040,58364.145370,0.000059,-0.208536,0.032952,4.457299,0.262997,2.671226,2012 OD1
202,528991,610145841615015109,58364.145845,0.000024,0.041499,-0.076768,0.761555,0.612902,0.979100,2009 HE82
206,523595,610146454715015011,58364.146458,0.000091,-0.297599,-0.136820,10.728472,0.148010,3.820955,2002 OS4
207,2014 JO93,610147040415015024,58364.147049,0.000187,0.674053,-0.003495,45.435963,0.029758,7.029330,u00002443
211,523595,610147534715015030,58364.147535,0.000096,-0.312742,-0.148789,11.994565,0.133081,4.033600,2002 OS4
...,...,...,...,...,...,...,...,...,...,...
262371,2018 SR14,624476745615015018,58378.476748,0.000082,-0.169211,0.244020,8.817818,0.210608,3.115515,u00569532
262373,529080,624476746015015007,58378.476748,0.000063,-0.024507,0.224275,5.089987,0.322978,2.260344,2009 QD43
263425,531321,624488844515015018,58378.488843,0.000134,0.403052,0.265212,23.278824,0.049232,6.022420,2012 QV14
263467,535203,624489304115015000,58378.489306,0.000021,0.020995,-0.071797,0.559554,0.679754,0.772048,2014 YJ15


In [13]:
unknown_mask = analysis_attributions["prev_obj_id"].str.contains("^u[0-9]{8}$", regex=True)
mask = (
    (analysis_attributions["prev_obj_id"] != analysis_attributions["orbit_id"])
    & unknown_mask
)

occurences = analysis_attributions[mask]["orbit_id"].value_counts()
print(len(occurences.index.values[occurences.values >= 5]))
interest = occurences.index.values[occurences.values >= 5]

534


In [14]:
pd.Series(interest).str.contains("^2018", regex=True).sum()

220

In [15]:
orbit_id = "2018 VO83"

analysis_attributions[analysis_attributions["orbit_id"] == orbit_id]

Unnamed: 0,orbit_id,obs_id,mjd_utc,distance,residual_ra_arcsec,residual_dec_arcsec,chi2,probability,mahalanobis_distance,prev_obj_id
17175,2018 VO83,611439544415015003,58365.439549,3.5e-05,0.125207,-0.015407,1.591421,0.528679,1.274749,u00189069
57512,2018 VO83,614404364415015006,58368.404363,8.1e-05,-0.20072,-0.210601,8.464138,0.23164,2.925142,u00276304
61206,2018 VO83,614436864415015009,58368.436863,5.8e-05,-0.013127,-0.210065,4.429946,0.349092,2.104837,u00278171
169706,2018 VO83,620380634715015005,58374.380637,4.7e-05,-0.16949,0.022022,2.921169,0.42083,1.731052,u00470023
232669,2018 VO83,623430304615015011,58377.430301,6e-05,-0.216556,0.008806,4.697403,0.33329,2.197487,u00546280
