In [4]:
#!/usr/bin/env python
# coding: utf-8

# Import required libraries and modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from astropy.coordinates import SkyCoord
import astropy.units as u
from tqdm import tqdm

In [5]:

# Define a threshold for matching errors
space_match_threshold = 1 * u.arcsec
MJD_tolerance = 0.00034 #30 sec in units of day 

# Define file paths.
sum_path = {}
sum_path["star"] = '../truth_star/truth_star_summary_v1-0-0.parquet'
#'/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_star/truth_star_summary_v1-0-0.parquet'
sum_path["sn"] = "../truth_sn/truth_sn_summary_v1-0-0.parquet" 
#'/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_sn/truth_sn_summary_v1-0-0.parquet'

var_path = {}
var_path["star"] = '../truth_star/truth_star_variability_v1-0-0.parquet'
#'/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_star/truth_star_variability_v1-0-0.parquet'
var_path["sn"] = '../truth_sn/truth_sn_variability_v1-0-0.parquet'
#'/sdf/data/rubin/shared/dc2_run2.2i_truth/truth_sn/truth_sn_variability_v1-0-0.parquet'

detection_csv_pth = "../sources_with_labels.csv" #'exported_sources.csv'

#get DIA detections
dia_detections = pd.read_csv(detection_csv_pth, index_col="diaSourceId") #formerly known as exported_csv
#only for now based on the csv we are reading
dia_detections.drop(["real"], axis=1, inplace=True)    

dia_detections

Unnamed: 0_level_0,ra,dec,midpointMjdTai,type
diaSourceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1257927201521665,55.760339,-32.260622,59583.125051,
1257927201521666,55.674078,-32.283857,59583.125051,
1257927201521667,55.552914,-32.306395,59583.125051,
1257927201521668,55.547689,-32.309278,59583.125051,
1257927201521669,55.570127,-32.306400,59583.125051,
...,...,...,...,...
660667525163384915,55.889519,-32.485637,61392.194195,star
661047079476396040,55.863218,-32.167598,61393.204087,star
662500331589992590,55.971559,-32.358853,61404.195949,star
662500331589992596,55.881022,-32.482719,61404.195949,star


## load truth catalogs

In [6]:
# Get mind and max ra and dec values to filter out unnecessary records.
max_exp_ra, min_exp_ra = dia_detections.ra.max(), dia_detections.ra.min()
max_exp_dec, min_exp_dec = dia_detections.dec.max(), dia_detections.dec.min()


catalog = {}
result_sum = {}

# Stage 1: Match sources in Space.
for s in ["star", "sn"]:
    # Read Parquet and CSV files to begin ground truth derivation.
    result_sum[s] = pd.read_parquet(sum_path[s])

    # Keep only those records from summary tables which are within the max ra and dec values in the exported sources.
    result_sum[s] = result_sum[s][(result_sum[s]['ra'] >= min_exp_ra) & (result_sum[s]['ra'] <= max_exp_ra) &\
                                    (result_sum[s]['dec'] >= min_exp_dec) & (result_sum[s]['dec'] <= max_exp_dec)]


    # Initialize astropy.coordinates.SkyCoord class for matching in space.
    catalog[s] = SkyCoord(ra=result_sum[s].ra, dec=result_sum[s].dec, unit=u.deg)

# Match exported sources with stars and supernovae.
detections_cat = SkyCoord(ra=dia_detections.ra, dec=dia_detections.dec, unit=u.deg)

In [23]:
# By default, set on_source = 0 and real=0 (bogus) for all values in the exported sources.
dia_detections['on_source'] = 0
dia_detections['real'] = 0
dia_detections['type'] = None

## spatial crossmatch

In [10]:
star_idx, star_d2d, star_d3d = detections_cat.match_to_catalog_sky(catalog['star'])
sn_idx, sn_d2d, sn_d3d = detections_cat.match_to_catalog_sky(catalog['sn'])

star_mask = star_d2d < space_match_threshold #remove matches that are too far
sn_mask = sn_d2d < space_match_threshold #remove matches that are too far

print(f"{np.sum(star_mask)} of {len(detections_cat)} stars matched before applying threshold")
print(f"{np.sum(sn_mask)} of {len(detections_cat)} sne matched before applying threshold")

7155 of 25446 stars matched before applying threshold
146 of 25446 sne matched before applying threshold


In [27]:


# Get all matched stars
matched_star_idx = star_idx[star_mask] #index in stars of matched dia_detections
print(f"Number of matched stars in Stage #1: {len(matched_star_idx)}")

# Get all matched supernovae
matched_sn_idx = sn_idx[sn_mask] #index in sn_cat of matched dia_detections
print(f"Number of matched sne in Stage #1: {len(matched_sn_idx)}")

dia_idx_stars = dia_detections.index[star_mask]
dia_idx_sn = dia_detections.index[sn_mask]

# Assign the variability sources catalog id to the detections
dia_detections["id"] = None
dia_detections.loc[dia_idx_stars,"id"] = result_sum["star"].iloc[matched_star_idx]["id"].to_numpy()
dia_detections.loc[dia_idx_sn,"id"] = result_sum['sn'].iloc[matched_sn_idx]["id"].to_numpy()



Number of matched stars in Stage #1: 7155
Number of matched sne in Stage #1: 146


In [28]:
dia_detections.head()

Unnamed: 0_level_0,ra,dec,midpointMjdTai,type,id,on_source,real
diaSourceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1257927201521665,55.760339,-32.260622,59583.125051,,30321355720.0,0,0
1257927201521666,55.674078,-32.283857,59583.125051,,,0,0
1257927201521667,55.552914,-32.306395,59583.125051,,,0,0
1257927201521668,55.547689,-32.309278,59583.125051,,,0,0
1257927201521669,55.570127,-32.3064,59583.125051,,,0,0


In [29]:
# The spatially matched detections get on_source = 1

dia_detections.loc[dia_idx_sn, "on_source"] = 1
dia_detections.loc[dia_idx_stars , "on_source"] = 1
dia_detections.loc[dia_idx_sn, "type"] = "sn"
dia_detections.loc[dia_idx_stars, "type"] = "star"
dia_detections

Unnamed: 0_level_0,ra,dec,midpointMjdTai,type,id,on_source,real
diaSourceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1257927201521665,55.760339,-32.260622,59583.125051,star,30321355720,1,0
1257927201521666,55.674078,-32.283857,59583.125051,,,0,0
1257927201521667,55.552914,-32.306395,59583.125051,,,0,0
1257927201521668,55.547689,-32.309278,59583.125051,,,0,0
1257927201521669,55.570127,-32.306400,59583.125051,,,0,0
...,...,...,...,...,...,...,...
660667525163384915,55.889519,-32.485637,61392.194195,star,31411443281,1,0
661047079476396040,55.863218,-32.167598,61393.204087,star,31102009372,1,0
662500331589992590,55.971559,-32.358853,61404.195949,star,31405685742,1,0
662500331589992596,55.881022,-32.482719,61404.195949,star,31411442918,1,0


In [30]:
# Print a summary at the end of first round of matching.
print("Summary at the end of First Stage:")
print(f"detections on a source", dia_detections["on_source"].sum(), "\n")
print(f"class detection: {dia_detections.groupby('type').count().iloc[:,0]}")

Summary at the end of First Stage:
detections on a source 7301 

class detection: type
sn       146
star    7155
Name: ra, dtype: int64


In [32]:
# Stage 2: Match sources in time.

matched_index = dia_detections["on_source"] > 0
matched = {}
matched["sn"] = dia_detections.loc[dia_idx_sn]
matched["star"] = dia_detections.loc[dia_idx_stars]


for s in ["sn", "star"]:
    print(f"working on class: {s}")
    
    # Get a list of all the unique MJDs of sources that matched in the previous stage for the object type.
    mjd_matched_in_space = matched[s].midpointMjdTai.unique()

    # Get min and max MJD values required for matching.
    max_mjd, min_mjd = mjd_matched_in_space.max(), mjd_matched_in_space.min()

    # Read star/sn lightcurve variability parquet for the object type
    df_var = pd.read_parquet(var_path[s])
    
    # Filter out records with unwanted MJDs.
    df_var = df_var[(df_var.MJD >= min_mjd) & (df_var.MJD <= max_mjd)]
    print(f"need to examine {len(df_var)} variability entries")
    
    for detected in tqdm(matched[s].index): #loop over indices of on_source detection 
        mask_matching_ids = df_var.id == matched[s].loc[detected].id #mask for sources with matching id in variability file 
        if np.any(np.abs(df_var[mask_matching_ids].MJD - matched[s].loc[detected].midpointMjdTai) <= MJD_tolerance):
            dia_detections.loc[detected, "real"] = 1
            print(detected)
    del df_var

dia_detections[dia_detections.real == 1]

working on class: sn
need to examine 12757691 variability entries


 12%|█████                                    | 18/146 [00:00<00:00, 177.33it/s]

342457652904722669


 40%|████████████████▎                        | 58/146 [00:00<00:00, 193.00it/s]

351362700657295472
351363319669456940


100%|████████████████████████████████████████| 146/146 [00:00<00:00, 195.94it/s]

645816020564443201
working on class: star





need to examine 380870139 variability entries


  4%|█▋                                      | 313/7155 [00:36<12:45,  8.93it/s]

104057445445271766


  4%|█▊                                      | 315/7155 [00:37<13:07,  8.68it/s]

104057445445271773


  4%|█▊                                      | 320/7155 [00:37<12:54,  8.83it/s]

104057445445271824


  5%|█▊                                      | 323/7155 [00:37<13:01,  8.74it/s]

104057445445271838


  5%|█▊                                      | 331/7155 [00:38<12:45,  8.92it/s]

104057445445271862


  5%|█▉                                      | 344/7155 [00:40<12:45,  8.90it/s]

104057445445271924


  5%|█▉                                      | 347/7155 [00:40<13:09,  8.62it/s]

104057445445271949


  8%|███▏                                    | 564/7155 [01:04<12:07,  9.06it/s]

121861151112822790


  8%|███▎                                    | 603/7155 [01:08<12:02,  9.07it/s]

121861649865900088


  9%|███▍                                    | 609/7155 [01:08<12:02,  9.06it/s]

121861649865900098


 15%|█████▉                                 | 1100/7155 [02:01<11:12,  9.00it/s]

225905106065818069


 15%|██████                                 | 1108/7155 [02:02<11:24,  8.83it/s]

225905106065818109


 18%|███████                                | 1303/7155 [02:23<11:07,  8.77it/s]

226394734485045274


 18%|███████▏                               | 1308/7155 [02:24<10:59,  8.87it/s]

226394734485045399


 18%|███████▏                               | 1317/7155 [02:25<11:44,  8.28it/s]

226394734485045432
226394734485045434


 19%|███████▏                               | 1325/7155 [02:26<10:51,  8.95it/s]

226394734485045443


 22%|████████▋                              | 1595/7155 [02:55<10:11,  9.09it/s]

242372028932292673


 24%|█████████▏                             | 1682/7155 [03:04<10:07,  9.00it/s]

242418712005574670


 24%|█████████▏                             | 1684/7155 [03:05<11:15,  8.09it/s]

242418712005574672
242418712005574674


 24%|█████████▏                             | 1696/7155 [03:06<10:06,  9.00it/s]

242418712005574692


 24%|█████████▎                             | 1699/7155 [03:06<10:20,  8.80it/s]

242418712005574697


 24%|█████████▎                             | 1704/7155 [03:07<10:12,  8.90it/s]

242418712005574706


 24%|█████████▎                             | 1715/7155 [03:08<10:09,  8.93it/s]

242418712005574754


 27%|██████████▍                            | 1912/7155 [03:29<09:36,  9.10it/s]

255384731867152471


 27%|██████████▍                            | 1915/7155 [03:29<10:32,  8.29it/s]

255384731867152474
255384731867152480


 30%|███████████▊                           | 2168/7155 [03:56<09:17,  8.94it/s]

342457652904722485


 30%|███████████▊                           | 2172/7155 [03:57<09:20,  8.89it/s]

342457652904722546


 30%|███████████▊                           | 2177/7155 [03:57<09:13,  8.99it/s]

342457652904722578


 30%|███████████▉                           | 2180/7155 [03:58<09:23,  8.84it/s]

342457652904722583


 31%|███████████▉                           | 2183/7155 [03:58<09:28,  8.75it/s]

342457652904722591


 31%|███████████▉                           | 2189/7155 [03:59<09:11,  9.00it/s]

342457652904722620


 31%|███████████▉                           | 2197/7155 [04:00<09:08,  9.05it/s]

342457652904722635


 33%|████████████▉                          | 2375/7155 [04:18<09:23,  8.48it/s]

347146711367417867
347146711367417869


 33%|████████████▉                          | 2382/7155 [04:19<08:44,  9.09it/s]

347146711367417879


 35%|█████████████▌                         | 2493/7155 [04:31<08:33,  9.08it/s]

351362700657295407


 35%|█████████████▋                         | 2509/7155 [04:33<08:27,  9.15it/s]

351362700657295474


 35%|█████████████▋                         | 2511/7155 [04:33<08:49,  8.77it/s]

351363319669456907


 35%|█████████████▋                         | 2518/7155 [04:34<08:35,  9.00it/s]

351363319669456937


 36%|██████████████                         | 2578/7155 [04:40<08:23,  9.09it/s]

354457242847674376


 36%|██████████████                         | 2583/7155 [04:41<08:29,  8.97it/s]

354457242847674415


 36%|██████████████▏                        | 2596/7155 [04:42<08:20,  9.11it/s]

354458871714021390


 36%|██████████████▏                        | 2599/7155 [04:42<08:31,  8.90it/s]

354458871714021395


 37%|██████████████▍                        | 2659/7155 [04:49<08:11,  9.14it/s]

357391803948728378


 39%|███████████████▏                       | 2784/7155 [05:02<08:38,  8.43it/s]

363352170583556164
363352170583556167


 42%|████████████████▍                      | 3012/7155 [05:26<07:40,  9.00it/s]

372268538259308571


 44%|█████████████████                      | 3136/7155 [05:39<07:26,  9.01it/s]

372716278936240195


 47%|██████████████████▌                    | 3397/7155 [06:07<06:51,  9.12it/s]

379922626857926693


 48%|██████████████████▋                    | 3437/7155 [06:11<07:23,  8.38it/s]

381161183756943368


 48%|██████████████████▊                    | 3440/7155 [06:12<07:09,  8.65it/s]

381161183756943376


 48%|██████████████████▊                    | 3445/7155 [06:12<06:52,  8.99it/s]

381161183756943395


 56%|█████████████████████▋                 | 3989/7155 [07:10<05:50,  9.03it/s]

488448449977516097


 56%|█████████████████████▊                 | 4013/7155 [07:13<05:43,  9.14it/s]

488448449977516574


 56%|█████████████████████▉                 | 4019/7155 [07:13<05:45,  9.09it/s]

488448449977516740


 60%|███████████████████████▍               | 4306/7155 [07:44<05:11,  9.15it/s]

498495382431465484


 61%|███████████████████████▉               | 4384/7155 [07:52<05:34,  8.29it/s]

502749563644280834
502749563644280839


 61%|███████████████████████▉               | 4387/7155 [07:52<05:18,  8.68it/s]

502749563644280841


 63%|████████████████████████▍              | 4492/7155 [08:04<04:54,  9.04it/s]

506428274000265218


 63%|████████████████████████▍              | 4494/7155 [08:04<05:04,  8.74it/s]

506428274000265221


 63%|████████████████████████▌              | 4506/7155 [08:05<04:49,  9.15it/s]

506428274000265256


 64%|████████████████████████▊              | 4562/7155 [08:11<04:44,  9.11it/s]

510734470488260617


 64%|████████████████████████▉              | 4576/7155 [08:12<04:44,  9.05it/s]

510734470488260672


 64%|████████████████████████▉              | 4579/7155 [08:13<05:13,  8.21it/s]

510734470488260676
510734470488260677


 66%|█████████████████████████▋             | 4722/7155 [08:28<04:33,  8.90it/s]

514672918991470600


 66%|█████████████████████████▊             | 4740/7155 [08:30<04:25,  9.10it/s]

514672918991470663


 67%|██████████████████████████▏            | 4803/7155 [08:37<04:19,  9.08it/s]

515513652934082695


 67%|██████████████████████████▏            | 4815/7155 [08:38<04:18,  9.06it/s]

515513652934082934


 75%|█████████████████████████████▎         | 5373/7155 [09:37<03:18,  8.99it/s]

554641942630105128


 77%|██████████████████████████████         | 5524/7155 [09:54<03:01,  9.00it/s]

620367796559151143


 79%|██████████████████████████████▊        | 5651/7155 [10:07<02:47,  8.99it/s]

625896016469557251


 79%|██████████████████████████████▊        | 5653/7155 [10:07<02:51,  8.75it/s]

625896016469557268


 80%|███████████████████████████████▏       | 5714/7155 [10:14<02:40,  8.97it/s]

627179613616865303


 83%|████████████████████████████████▍      | 5947/7155 [10:39<02:14,  8.96it/s]

636277479094353951


 88%|██████████████████████████████████▏    | 6267/7155 [11:13<01:43,  8.55it/s]

645816020564443233


 88%|██████████████████████████████████▏    | 6274/7155 [11:14<01:41,  8.69it/s]

645816020564443378


 88%|██████████████████████████████████▏    | 6280/7155 [11:15<01:40,  8.73it/s]

645816020564443511


 88%|██████████████████████████████████▏    | 6283/7155 [11:15<01:41,  8.61it/s]

645816020564443728


 90%|███████████████████████████████████▏   | 6455/7155 [11:33<01:18,  8.92it/s]

647699403989057971


 90%|███████████████████████████████████▏   | 6464/7155 [11:34<01:15,  9.17it/s]

647699403989058057


 95%|█████████████████████████████████████  | 6789/7155 [12:09<00:39,  9.24it/s]

121861151112822817


 96%|█████████████████████████████████████▎ | 6839/7155 [12:14<00:35,  8.83it/s]

225905106065817658


 96%|█████████████████████████████████████▌ | 6888/7155 [12:19<00:28,  9.22it/s]

242418712005574717


 97%|█████████████████████████████████████▋ | 6912/7155 [12:22<00:27,  8.82it/s]

351362700657295476


 97%|█████████████████████████████████████▊ | 6945/7155 [12:25<00:25,  8.31it/s]

372716278936240222


 97%|█████████████████████████████████████▉ | 6970/7155 [12:28<00:19,  9.26it/s]

379922626857926673


 98%|██████████████████████████████████████▏| 7012/7155 [12:32<00:15,  9.03it/s]

488448449977516453


 99%|██████████████████████████████████████▍| 7051/7155 [12:36<00:12,  8.57it/s]

510734470488260694


 99%|██████████████████████████████████████▌| 7069/7155 [12:38<00:10,  8.21it/s]

554641942630105130


 99%|██████████████████████████████████████▌| 7080/7155 [12:40<00:08,  9.06it/s]

627179613616865357


100%|██████████████████████████████████████▊| 7122/7155 [12:44<00:03,  8.59it/s]

645816020564443153


100%|██████████████████████████████████████▉| 7140/7155 [12:46<00:01,  9.04it/s]

647699403989058266


100%|███████████████████████████████████████| 7155/7155 [12:48<00:00,  9.31it/s]


Unnamed: 0_level_0,ra,dec,midpointMjdTai,type,id,on_source,real
diaSourceId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
104057445445271766,55.810521,-32.439969,59840.233977,star,31411437889,1,1
104057445445271773,55.685438,-32.420221,59840.233977,star,31107745884,1,1
104057445445271824,55.838160,-32.416255,59840.233977,star,31405691981,1,1
104057445445271838,55.879441,-32.413054,59840.233977,star,31411435379,1,1
104057445445271862,55.816092,-32.372750,59840.233977,star,31107745011,1,1
...,...,...,...,...,...,...,...
510734470488260694,55.966513,-32.222557,60973.167688,star,30321355633,1,1
554641942630105130,55.792227,-32.292353,61101.060926,star,30830343259,1,1
627179613616865357,55.831304,-32.192921,61297.243962,star,31405665055,1,1
645816020564443153,55.861250,-32.394682,61343.105723,star,31405689453,1,1
