In [1]:
from snowshovel.io import read_single_alert, parse_alerts
import pandas as pd
from tqdm import tqdm
from pathlib import Path

%matplotlib inline

Set up the config here for running the notebook

In [2]:
# Set this to the directory containing the unzipped avro alerts
data_dir = Path("/Users/robertstein/Data/ztf_public_20230609/")

# Set the number of alerts to read
n_to_read = 100

## What is an avro alert?

We can firstly look at a single alert, to see what it looks like:

In [3]:
alert_path = data_dir.joinpath("2350249170015010010.avro")
print(f"We will look at the avro alert with path '{alert_path}'")

We will look at the avro alert with path '/Users/robertstein/Data/ztf_public_20230609/2350249170015010010.avro'


In [4]:
alert = read_single_alert(alert_path)
print("An avro alert is just a python dictionary, as you can see below:")
alert

An avro alert is just a python dictionary, as you can see below:


{'jd': 2460104.7491782,
 'fid': 1,
 'pid': 2350249170015,
 'diffmaglim': 20.196775436401367,
 'pdiffimfilename': 'ztf_20230609249155_000326_zg_c01_o_q1_scimrefdiffimg.fits',
 'programpi': 'Kulkarni',
 'programid': 1,
 'candid': 2350249170015010010,
 'isdiffpos': 'f',
 'tblid': 10,
 'nid': 2350,
 'rcid': 0,
 'field': 326,
 'xpos': 1293.8194580078125,
 'ypos': 841.447509765625,
 'ra': 230.9148832,
 'dec': -19.2385929,
 'magpsf': 17.376073837280273,
 'sigmapsf': 0.047354377806186676,
 'chipsf': 14.490418434143066,
 'magap': 17.40329933166504,
 'sigmagap': 0.03269999846816063,
 'distnr': 0.16106265783309937,
 'magnr': 15.559000015258789,
 'sigmagnr': 0.013000000268220901,
 'chinr': 0.6050000190734863,
 'sharpnr': -0.017999999225139618,
 'sky': 0.5778443217277527,
 'magdiff': 0.027225999161601067,
 'fwhm': 2.9700000286102295,
 'classtar': 0.9950000047683716,
 'mindtoedge': 841.447509765625,
 'magfromlim': 2.793475389480591,
 'seeratio': 0.9369040727615356,
 'aimage': 0.8059999942779541,
 'b

## Handling many avro alerts:

That was just one avro alert, but our data will contain many sources, each with their own alert. python dictionaries are not very convenient for a large number of entries. Instead, we can use a function to read in a number of the alerts and convert them to a pandas dataframe.

In [5]:
alerts_df = parse_alerts(data_dir=data_dir, n_to_read=n_to_read)
print("Here you can see a pandas dataframe. The avro alert above is the first row in the table.")
alerts_df

100%|████████████████████████████████████████| 100/100 [00:00<00:00, 210.50it/s]

Here you can see a pandas dataframe. The avro alert above is the first row in the table.





Unnamed: 0,jd,fid,pid,diffmaglim,pdiffimfilename,programpi,programid,candid,isdiffpos,tblid,...,zpmed,clrmed,clrrms,neargaia,neargaiabright,maggaia,maggaiabright,exptime,drb,drbversion
0,2.460105e+06,1,2350249170015,20.196775,ztf_20230609249155_000326_zg_c01_o_q1_scimrefd...,Kulkarni,1,2350249170015010010,f,10,...,26.086000,0.554,0.230397,0.228274,-999.000000,15.315500,-999.000000,30.0,0.999971,d6_m7
1,2.460105e+06,1,2350249170015,20.196775,ztf_20230609249155_000326_zg_c01_o_q1_scimrefd...,Kulkarni,1,2350249170015010011,f,11,...,26.086000,0.554,0.230397,0.066207,-999.000000,14.556261,-999.000000,30.0,0.999255,d6_m7
2,2.460105e+06,1,2350249170015,20.196775,ztf_20230609249155_000326_zg_c01_o_q1_scimrefd...,Kulkarni,1,2350249170015010013,f,13,...,26.086000,0.554,0.230397,0.175491,-999.000000,16.767410,-999.000000,30.0,1.000000,d6_m7
3,2.460105e+06,1,2350249170015,20.196775,ztf_20230609249155_000326_zg_c01_o_q1_scimrefd...,Kulkarni,1,2350249170015010023,f,23,...,26.086000,0.554,0.230397,0.398773,-999.000000,16.487196,-999.000000,30.0,1.000000,d6_m7
4,2.460105e+06,1,2350249170015,20.196775,ztf_20230609249155_000326_zg_c01_o_q1_scimrefd...,Kulkarni,1,2350249170015010024,f,24,...,26.086000,0.554,0.230397,0.105536,52.636383,18.771164,13.606915,30.0,0.999883,d6_m7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2.460105e+06,1,2350249170515,20.181540,ztf_20230609249155_000326_zg_c02_o_q2_scimrefd...,Kulkarni,1,2350249170515015004,t,4,...,26.224501,0.538,0.233582,15.838425,76.763954,18.365631,13.423489,30.0,0.999845,d6_m7
96,2.460105e+06,1,2350249170515,20.181540,ztf_20230609249155_000326_zg_c02_o_q2_scimrefd...,Kulkarni,1,2350249170515015005,t,5,...,26.224501,0.538,0.233582,16.460882,-999.000000,18.606089,-999.000000,30.0,1.000000,d6_m7
97,2.460105e+06,1,2350249170515,20.181540,ztf_20230609249155_000326_zg_c02_o_q2_scimrefd...,Kulkarni,1,2350249170515015010,t,10,...,26.224501,0.538,0.233582,14.950565,-999.000000,19.616867,-999.000000,30.0,0.999972,d6_m7
98,2.460105e+06,1,2350249170515,20.181540,ztf_20230609249155_000326_zg_c02_o_q2_scimrefd...,Kulkarni,1,2350249170515015013,t,13,...,26.224501,0.538,0.233582,5.632905,80.984413,20.652666,13.806736,30.0,0.999986,d6_m7


The table above contains many rows and columns. It can be hard to see what the important information. Let's just display a few relevant columns, rather than all 103.

In [6]:
useful_columns = [
    "isdiffpos", # Is the detection postive (more flux in science image than reference), or negative (less flux)?
    "ra", # What is the sky position in Right Ascension?
    "dec", # What is the sky position in Declination?
    "magpsf", # What is the magnitude (brightness) of the detection?
    "sigmapsf", # What is the uncertainty on the magnitude?
    "distnr", # How close is the detection to a known source in the reference image?
    "ndethist", # How many times has this object been detected?
]
slim_alerts_df = alerts_df[useful_columns]
slim_alerts_df

Unnamed: 0,isdiffpos,ra,dec,magpsf,sigmapsf,distnr,ndethist
0,f,230.914883,-19.238593,17.376074,0.047354,0.161063,620
1,f,230.630090,-19.273569,17.239075,0.078380,0.173305,639
2,f,231.014147,-19.282204,18.312075,0.061234,0.134472,611
3,f,230.909926,-19.485533,18.983074,0.085728,0.424015,721
4,f,230.435069,-19.499763,20.108074,0.164924,0.145266,65
...,...,...,...,...,...,...,...
95,t,228.200989,-19.285597,19.772402,0.170230,15.738925,1
96,t,227.781890,-19.395761,19.201401,0.113029,12.420472,1
97,t,228.044432,-19.503497,19.268400,0.112572,4.766288,1
98,t,228.354661,-19.831578,18.597401,0.085115,4.054725,1


In [9]:
# Bad subtractions are often not repeated
# Let's restrict ourselves to sources which are detected at least twice
mask = slim_alerts_df["ndethist"] > 1
slim_alerts_df = slim_alerts_df[mask].reset_index()
print(f"There are {len(slim_alerts_df)} dources ")

slim_alerts_df

Unnamed: 0,index,isdiffpos,ra,dec,magpsf,sigmapsf,distnr,ndethist
0,0,f,230.914883,-19.238593,17.376074,0.047354,0.161063,620
1,1,f,230.63009,-19.273569,17.239075,0.07838,0.173305,639
2,2,f,231.014147,-19.282204,18.312075,0.061234,0.134472,611
3,3,f,230.909926,-19.485533,18.983074,0.085728,0.424015,721
4,4,f,230.435069,-19.499763,20.108074,0.164924,0.145266,65
5,7,t,230.453158,-19.12365,18.570074,0.079196,0.143341,542
6,11,t,231.24265,-19.245702,16.856073,0.091851,0.708412,255
7,17,f,230.045208,-19.158488,17.748367,0.086055,0.774607,215
8,18,f,229.473594,-19.301989,18.538366,0.18685,0.283202,16
9,19,f,230.072448,-19.420921,19.711367,0.144221,0.132384,204
