## ZTF - Attribution
In this notebook, we run attribution on the two weeks of ZTF data from 2018 using a more recent orbit catalog available in 2021. We also update the ssnamenr provided by ZTF to be compatible with 

In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
from astropy.time import Time

import thor

from thor import __version__
print("THOR version: {}".format(__version__))

THOR version: 1.1.dev199+g1c54766.d20210401


In [2]:
preprocessed_observations = pd.read_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_observations.csv",
    index_col=False,
    dtype={
        "obs_id" : str
    }
)
preprocessed_associations = pd.read_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_associations.csv",
    index_col=False,
    dtype={
        "obs_id" : str
    }
)

In [3]:
from thor.utils import getMPCOrbitCatalog
from thor.utils import readMPCOrbitCatalog

#getMPCOrbitCatalog() Last run on April 20th 2021
mpcorb = readMPCOrbitCatalog()

In [4]:
last_observed = mpcorb["last_obs"].str.split("-", expand=True)
cols = ["year", "month", "day"]
last_observed.rename(columns={i : n for i, n in enumerate(cols)}, inplace=True)
for c in cols:
    last_observed[c] = last_observed[c].astype(int)

mask = (
    # Remove orbits that have an arc length less than 10 days
    (mpcorb["arc_days"].isna() | (mpcorb["arc_days"] >= 10)) 
    
    # Remove orbits that have had their eccentricity values set rather than fit 
    & (~mpcorb["uncertainty_param"].isin(["E"])) 
    
    # Remove orbits that have not been observed in the current century
    & (last_observed["year"] >= 2000)
)

print(len(mpcorb))
print(len(mpcorb[mask]))

1075888
1044313


In [5]:
from thor.orbits import Orbits

mpc_orbits = Orbits.fromMPCOrbitCatalog(mpcorb[mask])
mpcorb[mask].to_csv(
    "/mnt/data/projects/thor/thor_data/ztf/MPCORB_20210420.csv", 
    index=False
)

In [6]:
from thor.orbits import attributeObservations

attribution_file = "/mnt/data/projects/thor/thor_data/ztf/attributions_20210420.csv"
if not os.path.exists(attribution_file):
    attributions = attributeObservations(
        mpc_orbits,
        preprocessed_observations,
        eps=1/3600,
        include_probabilistic=True,
        orbits_chunk_size=100,
        observations_chunk_size=200000,
        threads=60,
        backend='PYOORB',
        backend_kwargs={},
    )
    attributions.to_csv(attribution_file, index=False)
else:
    attributions = pd.read_csv(
        attribution_file, 
        index_col=False, 
        low_memory=False,
        dtype={
            "obs_id" : str,
        }
    )

In [7]:
attributions

Unnamed: 0,orbit_id,obs_id,mjd_utc,distance,residual_ra_arcsec,residual_dec_arcsec,chi2,probability,mahalanobis_distance
0,100006,619483563715015157,58373.483565,0.000137,0.477276,-0.122193,24.272318,0.063444,5.515190
1,100006,621505463615015055,58375.505463,0.000081,0.241698,-0.162031,8.467188,0.205028,3.169222
2,100006,622508663315015053,58376.508669,0.000112,0.375438,-0.150549,16.361904,0.105889,4.490729
3,100006,623475553315015033,58377.475556,0.000095,0.309388,0.143574,11.633459,0.151788,3.770541
4,100006,623516093315015018,58377.516099,0.000080,0.216394,-0.192677,8.395061,0.211414,3.107878
...,...,...,...,...,...,...,...,...,...
266159,99970,620317971815015002,58374.317974,0.000053,-0.138432,-0.133353,3.694645,0.382141,1.923932
266160,99986,620266601615015005,58374.266609,0.000069,-0.202350,-0.144452,6.181192,0.288169,2.488420
266161,99987,619331292415015033,58373.331296,0.000032,-0.098039,0.062424,1.350831,0.558740,1.164143
266162,99987,622329122415015037,58376.329120,0.000061,-0.217708,0.026370,4.809220,0.333231,2.197837


In [8]:
attributions.sort_values(
    by=["obs_id", "distance"], 
    ascending=[True, True],
    inplace=True,
    ignore_index=True
)
attributions.drop_duplicates(
    subset=["obs_id"], 
    keep="first",
    inplace=True,
    ignore_index=True
)

In [9]:
# Create new file with 2021 attributions for analysis
preprocessed_associations_2021 = preprocessed_associations.merge(attributions[["obs_id", "orbit_id"]], on="obs_id", how="left")
preprocessed_associations_2021.loc[preprocessed_associations_2021["orbit_id"].isna(), "orbit_id"] = preprocessed_associations_2021[preprocessed_associations_2021["orbit_id"].isna()]["obj_id"].values
preprocessed_associations_2021.rename(
    columns={
        "obj_id" : "prev_obj_id",
        "orbit_id" : "obj_id"
    },
    inplace=True
)
preprocessed_associations_2021.to_csv(
    "/mnt/data/projects/thor/thor_data/ztf/preprocessed_associations_20210420.csv",
    index=False
)

In [10]:
preprocessed_associations_2021

Unnamed: 0,obs_id,prev_obj_id,obj_id
0,610130484415010015,u00000000,u00000000
1,610130481215010007,u00000001,u00000001
2,610130481215015021,u00000002,u00000002
3,610130483515015056,u00000003,u00000003
4,610130483515015069,u00000004,u00000004
...,...,...,...
827541,624525841115010001,u00572186,u00572186
827542,624525845615015001,43423,43423
827543,624525842315010002,u00572187,u00572187
827544,624525840015015062,61549,61549
