diff --git a/tests/test_noisification.py b/tests/test_noisification.py index cfa7d8f..4130b7f 100644 --- a/tests/test_noisification.py +++ b/tests/test_noisification.py @@ -8,6 +8,7 @@ from ztfparsnip.create import CreateLightcurves logging.getLogger("ztfparsnip.create").setLevel(logging.DEBUG) +logging.getLogger("ztfparsnip.io").setLevel(logging.DEBUG) class TestNoisification(unittest.TestCase): @@ -54,92 +55,61 @@ def test_noisification_csv(self): sample.select() sample.create(plot_debug=True) - for name in ["ZTF19aapreis", "ZTF20acvmzfv"]: + available = [x for x in sample.test_dir.glob("*") if x.is_file()] + + for entry in available: + print(entry) + + for name in ["ZTF19aapreis", "ZTF18aamvfeb"]: path = sample.test_dir / f"{name}.csv" pd.read_csv(path, comment="#") - infile_noisified = sample.train_dir / "ZTF18aavvnzu_3.csv" + infile_noisified = sample.train_dir / "ZTF20acueziy_2.csv" df = pd.read_csv(infile_noisified, comment="#", index_col=0) df.sort_values(by=["obsmjd"], inplace=True) + mags = df.magpsf.values reference_mags = [ - 25.29078394, - 25.76189095, - 27.2655531, - 24.6668269, - 24.53136655, - np.nan, - 25.57881118, - 24.96131425, - 24.6494601, - 29.39622331, + 20.7862801545962, + 19.9859143237067, + 20.3707809492468, + 19.9429214884179, + 20.4923597159272, + 19.888937399982, + 20.8460105835683, + 20.7091668978473, + 20.0292037077898, + 21.1112591814753, + 20.1875765243095, + 21.0763226188922, + 20.3086801104863, + 21.9130488366769, + 20.6240161912531, + 21.7743936386835, + 20.9042362388927, + 20.0740109279902, + 21.239673495802, + 20.7024764008374, + 25.7528842507013, + 20.4149368461691, + 21.1107872472987, + 20.8958085465419, + 22.5168452113857, + 21.2957311943224, + 20.9247303408706, + 21.1622199708444, + 22.8048906115971, + 21.3839012928751, np.nan, - 23.98498964, - 24.12828652, - 23.1775983, - 22.00932895, - 22.55727056, - 21.62206101, - 21.39578053, - 22.05108525, - 20.94277367, - 21.03368154, - 20.34244796, - 19.9950083, - 20.78069822, - 19.88999309, - 19.75953625, - 19.52539882, - 19.73810262, - 21.16211422, - 19.56434958, - 19.8297774, - 19.58625593, - 21.16368346, - 20.15647694, - 19.85284108, - 19.89014672, - 21.39221394, - 19.99618702, - 20.10251282, - 20.18338371, - 20.46475862, - 20.27376621, - 20.45132811, - 20.75834641, - 21.78499175, - 21.01347781, - 21.36923858, - 21.61849326, - 20.41307089, - 20.23913687, - 21.45339639, - 21.27754412, - 20.53259678, - 21.24645026, - 21.23964233, - 21.81331908, - 20.51849827, - 21.42855531, - 21.92474183, - 20.7997237, - 20.86847922, - 21.26933793, - 21.63336927, - 22.15300501, - 21.05602829, - 21.85539486, - 22.23739681, - 20.93244031, - 21.78658614, - 21.84444574, - 20.98134349, - 22.26270503, - 21.43027687, - 21.87283486, - 22.18428394, - 21.40032939, - 22.04574997, + 21.1104346878744, + 20.9656819124267, + 21.2908527288881, + 20.9777549528361, + 22.1839553755309, + 22.0672315128775, + 21.8042786749289, + 21.7824972084558, + 24.5796190172382, ] np.testing.assert_almost_equal(df.magpsf.values, reference_mags, decimal=5) diff --git a/ztfparsnip/create.py b/ztfparsnip/create.py index 57af787..fa97882 100644 --- a/ztfparsnip/create.py +++ b/ztfparsnip/create.py @@ -30,6 +30,7 @@ def __init__( k_corr: bool = True, seed: int | None = None, bts_baseline_dir: Path = io.BTS_LC_BASELINE_DIR, + bl_corrected: bool = True, name: str = "train", reprocess_headers: bool = False, output_format: str = "parsnip", @@ -53,7 +54,11 @@ def __init__( self.plot_magdist = plot_magdist self.train_dir = train_dir self.plot_dir = plot_dir - self.lc_dir = bts_baseline_dir + if bl_corrected: + self.lc_dir = bts_baseline_dir + else: + self.lc_dir = io.BTS_LC_DIR + self.testing = testing self.rng = default_rng(seed=self.seed) @@ -75,22 +80,27 @@ def __init__( if isinstance(self.plot_dir, str): self.plot_dir = Path(self.plot_dir) - if test_dir is None: - self.test_dir = self.train_dir.resolve().parent / "test" + self.test_dir: Path | None = None + + if self.test_fraction > 0: + if test_dir is None: + self.test_dir = self.train_dir.resolve().parent / "test" + else: + self.test_dir = Path(test_dir) + self.test_dir.mkdir(exist_ok=True, parents=True) else: - self.test_dir = Path(test_dir) + self.test_dir = None - for p in [self.train_dir, self.plot_dir, self.test_dir]: - if not p.exists(): - os.makedirs(p) + for p in [self.train_dir, self.plot_dir]: + p.mkdir(exist_ok=True, parents=True) self.config = io.load_config() """ if we are in the default sample dir, check if files are there, - check if files are there an download if not + check if files are there and download if not """ - if self.lc_dir == io.BTS_LC_BASELINE_DIR: + if self.lc_dir in [io.BTS_LC_BASELINE_DIR, io.BTS_LC_DIR]: if not self.testing: nr_files = len([x for x in self.lc_dir.glob("*") if x.is_file()]) else: @@ -98,13 +108,18 @@ def __init__( for x in self.lc_dir.glob("*"): if f"{x.name}".split("_")[0] in self.config["test_lightcurves"]: nr_files += 1 - if (self.testing == False and nr_files < 6841) or ( - self.testing and nr_files < 10 + + if ( + (self.testing == False and bl_corrected == True and nr_files < 6841) + or (self.testing == False and bl_corrected == False and nr_files < 7130) + or (self.testing and nr_files < 10) ): self.logger.info("Downloading sample") - io.download_sample(testing=testing) + io.download_sample(testing=testing, bl_corrected=bl_corrected) - self.ztfids = io.get_all_ztfids(lc_dir=self.lc_dir, testing=self.testing) + self.ztfids = io.get_all_ztfids( + lc_dir=self.lc_dir, testing=self.testing, bl_corrected=bl_corrected + ) classkeys_available = [ key @@ -135,7 +150,17 @@ def __init__( self.logger.info("Creating noisified training data.") self.logger.info( - f"\n---------------------------------\nSelected configuration\nweights: {weights_info}\nk correction: {self.k_corr}\ntest fraction: {self.test_fraction}\nseed: {self.seed}\noutput format: {self.output_format}\ntraining data output directory: {self.train_dir}\n---------------------------------" + f"\n---------------------------------\n" + f"Selected configuration" + f"\nweights: {weights_info}\n" + f"k correction: {self.k_corr}\n" + f"test fraction: {self.test_fraction}\n" + f"seed: {self.seed}\n" + f"output format: {self.output_format}\n" + f"training data output directory: {self.train_dir}\n" + f"test data output directory: {self.test_dir}\n" + f"plot directory: {self.plot_dir}\n" + f"---------------------------------" ) def get_simple_class(self, classkey: str, bts_class: str) -> str: @@ -249,6 +274,7 @@ def select( for k, v in classes_available.items(): availability += f"{k}: {classes_available[k]['entries']}\n" available_dict.update({k: classes_available[k]["entries"]}) + self.logger.info( f"\n---------------------------------\nLightcurves available:\n{availability}---------------------------------" ) @@ -306,6 +332,7 @@ def create( delta_z: float = 0.1, SN_threshold: float = 5.0, n_det_threshold: int = 5, + detection_scale: float = 0.5, subsampling_rate: float = 1.0, jd_scatter_sigma: float = 0.0, n: int | None = None, @@ -329,6 +356,7 @@ def create( "SN_threshold": SN_threshold, "n_det_threshold": n_det_threshold, }, + "detection_scale": detection_scale, "subsampling_rate": subsampling_rate, "jd_scatter_sigma": jd_scatter_sigma, } @@ -342,7 +370,8 @@ def create( if c in self.selection.keys(): # check if it's a test sample lightcurve if header["name"] in self.test_sample["all"]["ztfids"]: - multiplier = 0 + # multiplier = 0 + multiplier = self.selection[c] get_test = True else: multiplier = self.selection[c] @@ -359,21 +388,36 @@ def create( sig_noise_cut=sig_noise_cut, SN_threshold=SN_threshold, n_det_threshold=n_det_threshold, + detection_scale=detection_scale, subsampling_rate=subsampling_rate, jd_scatter_sigma=jd_scatter_sigma, output_format=self.output_format, ) if get_test: - test_lc, _ = noisify.noisify_lightcurve() + test_lc, noisy_test_lcs = noisify.noisify_lightcurve() if test_lc is not None: + for i, noisy_test_lc in enumerate(noisy_test_lcs): + noisy_test_lc.meta["name"] = ( + noisy_test_lc.meta["name"] + f"_{i}" + ) final_lightcurves["bts_test"].append(test_lc) - if self.output_format == "ztfnuclear": + final_lightcurves["bts_test"].extend(noisy_test_lcs) + if ( + self.output_format == "ztfnuclear" + and self.test_dir is not None + ): io.save_csv_with_header( test_lc, savedir=self.test_dir, output_format=self.output_format, ) + for noisy_test_lc in noisy_test_lcs: + io.save_csv_with_header( + noisy_test_lc, + savedir=self.test_dir, + output_format=self.output_format, + ) else: bts_lc, noisy_lcs = noisify.noisify_lightcurve() @@ -456,7 +500,7 @@ def create( # Save h5 files for k, v in final_lightcurves.items(): if len(v) > 0: - if k == "bts_test": + if k == "bts_test" and self.test_dir is not None: output_dir = self.test_dir else: output_dir = self.train_dir diff --git a/ztfparsnip/io.py b/ztfparsnip/io.py index e1ce949..2d7bd80 100644 --- a/ztfparsnip/io.py +++ b/ztfparsnip/io.py @@ -2,6 +2,7 @@ # Author: Simeon Reusch (simeon.reusch@desy.de) # License: BSD-3-Clause +import glob import logging import os import random @@ -22,6 +23,7 @@ if ztfdir := os.getenv("ZTFDATA"): BASE_DIR = Path(ztfdir) / "ztfparsnip" BTS_LC_BASELINE_DIR = BASE_DIR / "BTS_plus_TDE" + BTS_LC_DIR = BASE_DIR / "BTS_plus_TDE_nobl" TRAIN_DATA = BASE_DIR / "train" PLOT_DIR = BASE_DIR / "plots" DOWNLOAD_URL_SAMPLE = Path( @@ -30,8 +32,11 @@ DOWNLOAD_URL_SAMPLE_TEST = Path( "https://syncandshare.desy.de/index.php/s/bnGQYb9goiHi6bH/download" ) + DOWNLOAD_URL_SAMPLE_NOBL = Path( + "https://syncandshare.desy.de/index.php/s/co6B4kNGejcpDmT/download" + ) - for d in [BASE_DIR, BTS_LC_BASELINE_DIR, TRAIN_DATA, PLOT_DIR]: + for d in [BASE_DIR, BTS_LC_BASELINE_DIR, BTS_LC_DIR, TRAIN_DATA, PLOT_DIR]: if not d.is_dir(): os.makedirs(d) @@ -39,7 +44,7 @@ else: raise ValueError( - "You have to set the ZTFDATA environment variable in your .bashrc or .zshrc. See github.com/mickaelrigault/ztfquery" + "You have to set the ZTFDATA environment variable in your .bashrc or .zshrc. See https://github.com/mickaelrigault/ztfquery" ) @@ -59,13 +64,19 @@ def add_mag(df: pd.DataFrame) -> pd.DataFrame: Add mag and magerr """ # remove negative flux rows - df.query("ampl_corr>0", inplace=True) + if "ampl_corr" in list(df.keys()): + ampl_col = "ampl_corr" + ampl_err_col = "ampl_err_corr" + else: + ampl_col = "ampl" + ampl_err_col = "ampl.err" + df.query(f"{ampl_col}>0", inplace=True) F0 = 10 ** (df.magzp / 2.5) F0_err = F0 / 2.5 * np.log(10) * df.magzpunc - flux = df.ampl_corr / F0 * 3630.78 + flux = df[ampl_col] / F0 * 3630.78 flux_err = ( - np.sqrt((df.ampl_err_corr / F0) ** 2 + (df.ampl_corr * F0_err / F0**2) ** 2) + np.sqrt((df[ampl_err_col] / F0) ** 2 + (df[ampl_col] * F0_err / F0**2) ** 2) * 3630.78 ) @@ -106,7 +117,8 @@ def get_ztfid_dataframe(ztfid: str, lc_dir: Path | None = None) -> pd.DataFrame if is_valid_ztfid(ztfid): if lc_dir is None: lc_dir = BTS_LC_BASELINE_DIR - filepath = lc_dir / f"{ztfid}_bl.csv" + + filepath = glob.glob(str(lc_dir / f"{ztfid}*"))[0] try: df = pd.read_csv(filepath, comment="#", index_col=0) @@ -127,7 +139,8 @@ def get_ztfid_header(ztfid: str, lc_dir: Path | None = None) -> dict | None: if is_valid_ztfid(ztfid): if lc_dir is None: lc_dir = BTS_LC_BASELINE_DIR - filepath = lc_dir / f"{ztfid}_bl.csv" + + filepath = glob.glob(str(lc_dir / f"{ztfid}*"))[0] try: with open(filepath, "r") as input_file: @@ -165,7 +178,9 @@ def get_ztfid_header(ztfid: str, lc_dir: Path | None = None) -> dict | None: raise ValueError(f"{ztfid} is not a valid ZTF ID") -def save_csv_with_header(lc, savedir: Path, output_format: str = "ztfnuclear"): +def save_csv_with_header( + lc, savedir: Path, output_format: str = "ztfnuclear", bl_corrected: bool = True +): """ Generate a string of the header from a dict, meant to be written to a csv file. Save the lightcurve with the header info as csv """ @@ -188,8 +203,12 @@ def save_csv_with_header(lc, savedir: Path, output_format: str = "ztfnuclear"): del lc["jd"] lc.rename_column("zp", "magzp") - lc.rename_column("flux", "ampl_corr") - lc.rename_column("fluxerr", "ampl_err_corr") + if bl_corrected: + lc.rename_column("flux", "ampl_corr") + lc.rename_column("fluxerr", "ampl_err_corr") + else: + lc.rename_column("flux", "ampl") + lc.rename_column("fluxerr", "ampl.err") filename = f"{lc_id}.csv" @@ -218,12 +237,17 @@ def short_id(): return "".join(random.choices(alphabet, k=5)) -def get_all_ztfids(lc_dir: Path | None = None, testing: bool = False) -> List[str]: +def get_all_ztfids( + lc_dir: Path | None = None, testing: bool = False, bl_corrected: bool = True +) -> List[str]: """ Checks the lightcurve folder and gets all ztfids """ if lc_dir is None: - lc_dir = BTS_LC_BASELINE_DIR + if bl_corrected: + lc_dir = BTS_LC_BASELINE_DIR + else: + lc_dir = BTS_LC_DIR ztfids = [] for name in os.listdir(lc_dir): @@ -255,7 +279,7 @@ def load_config(config_path: Path | None = None) -> dict: return config -def download_sample(testing: bool = False): +def download_sample(testing: bool = False, bl_corrected: bool = True): """ Download the BTS + TDE lightcurves from the DESY Nextcloud """ @@ -263,11 +287,20 @@ def download_sample(testing: bool = False): if testing: cmd_dl = f"curl --create-dirs -J -O --output-dir {ZTFDATA}/ztfparsnip {DOWNLOAD_URL_SAMPLE_TEST}" else: - cmd_dl = f"curl --create-dirs -J -O --output-dir {ZTFDATA}/ztfparsnip {DOWNLOAD_URL_SAMPLE}" + if bl_corrected: + cmd_dl = f"curl --create-dirs -J -O --output-dir {ZTFDATA}/ztfparsnip {DOWNLOAD_URL_SAMPLE}" + else: + cmd_dl = f"curl --create-dirs -J -O --output-dir {ZTFDATA}/ztfparsnip {DOWNLOAD_URL_SAMPLE_NOBL}" + + if bl_corrected: + zipfile_name = "BTS_plus_TDE" + else: + zipfile_name = "BTS_plus_TDE_nobl" + cmd_extract = ( - f"unzip {ZTFDATA}/ztfparsnip/BTS_plus_TDE.zip -d {ZTFDATA}/ztfparsnip" + f"unzip {ZTFDATA}/ztfparsnip/{zipfile_name}.zip -d {ZTFDATA}/ztfparsnip" ) - cmd_remove_zip = f"rm {ZTFDATA}/ztfparsnip/BTS_plus_TDE.zip" + cmd_remove_zip = f"rm {ZTFDATA}/ztfparsnip/{zipfile_name}.zip" # Download subprocess.run(cmd_dl, shell=True) @@ -275,19 +308,22 @@ def download_sample(testing: bool = False): # Extract subprocess.run(cmd_extract, shell=True) - extracted_dir = Path(ZTFDATA) / "ztfparsnip" / "BTS_plus_TDE" + extracted_dir = Path(ZTFDATA) / "ztfparsnip" / zipfile_name # Validate nr_files = len([x for x in extracted_dir.glob("*") if x.is_file()]) - if nr_files == 6841 and testing: + + if nr_files in [6841, 7130] and testing: subprocess.run(cmd_remove_zip, shell=True) elif nr_files == 10 and testing: subprocess.run(cmd_remove_zip, shell=True) else: raise ValueError( - "Something went wrong with your download. Remove 'ZTFDATA/ztfparsnip/BTS_plus_TDE' and try again" + f"Something went wrong with your download. Remove 'ZTFDATA/ztfparsnip/{zipfile_name}' and try again" ) + logger.info(f"{nr_files} files found in dir {extracted_dir}") + else: raise ValueError( "You have to set the ZTFDATA environment variable in your .bashrc or .zshrc. See https://github.com/mickaelrigault/ztfquery" diff --git a/ztfparsnip/noisify.py b/ztfparsnip/noisify.py index 4c1421b..162d643 100644 --- a/ztfparsnip/noisify.py +++ b/ztfparsnip/noisify.py @@ -9,14 +9,15 @@ from copy import copy from pathlib import Path -import lcdata # type:ignore import numpy as np import numpy.ma as ma import pandas as pd +from numpy.random import default_rng + +import lcdata # type:ignore import sncosmo # type:ignore from astropy.cosmology import Planck18 as cosmo # type:ignore from astropy.table import Table # type:ignore -from numpy.random import default_rng from ztfparsnip import io @@ -46,6 +47,7 @@ def __init__( sig_noise_cut: bool = True, SN_threshold: float = 5.0, n_det_threshold: int = 5, + detection_scale: float = 0.5, subsampling_rate: float = 1.0, jd_scatter_sigma: float = 0.0, ): @@ -62,9 +64,11 @@ def __init__( self.sig_noise_cut = sig_noise_cut self.SN_threshold = SN_threshold self.n_det_threshold = n_det_threshold + self.detection_scale = detection_scale self.subsampling_rate = subsampling_rate self.jd_scatter_sigma = jd_scatter_sigma self.rng = default_rng(seed=self.seed) + # self.z_valid_list = self.rng.uniform(float(self.header["bts_z"]), float(self.header["bts_z"])+self.delta_z, size=10000) self.z_list = self.rng.power(4, 10000) * ( float(self.header["bts_z"]) + self.delta_z ) @@ -100,52 +104,78 @@ def noisify_lightcurve(self): if delta_m is not None: new_table["magpsf"] = new_table["magpsf"].data + delta_m new_table["flux"] = new_table["flux"].data + delta_f - if self.sig_noise_cut: - peak_idx = np.argmax(new_table["flux"]) - sig_noise_df = pd.DataFrame( - data={ - "SN": np.abs( - np.array(new_table["flux"] / new_table["fluxerr"]) - ) - } - ) - count_sn = sig_noise_df[ - sig_noise_df["SN"] > self.SN_threshold - ].count() - if ( - new_table["flux"][peak_idx] / new_table["fluxerr"][peak_idx] - ) > self.SN_threshold: - if count_sn[0] >= self.n_det_threshold: - # Randomly remove datapoints, retaining (subsampling_rate)% of lc - if ( - self.subsampling_rate < 1.0 - and len(new_table["flux"]) > 10 - ): - subsampled_length = int( - len(new_table["flux"]) * self.subsampling_rate + # remove negative flux values + neg_mask = new_table["flux"].data > 0.0 + new_table = new_table[neg_mask] + if len(new_table) == 0: + res.append(0) + else: + # Add cut on S/N + if self.sig_noise_cut: + peak_idx = np.argmax(new_table["flux"]) + sig_noise_df = pd.DataFrame( + data={ + "SN": np.abs( + np.array(new_table["flux"] / new_table["fluxerr"]) ) - indices_to_keep = self.rng.choice( - len(new_table["flux"]), - subsampled_length, - replace=False, + } + ) + count_sn = sig_noise_df[ + sig_noise_df["SN"] > self.SN_threshold + ].count() + if ( + new_table["flux"][peak_idx] / new_table["fluxerr"][peak_idx] + ) > self.SN_threshold: + if count_sn[0] >= self.n_det_threshold: + # Remove data points according to density distribution + new_idx = self.drop_points( + new_table["jd"], + new_table["band"], + cadence_scale=self.detection_scale, ) - - aug_table = new_table[indices_to_keep] - if self.jd_scatter_sigma > 0: - aug_table = self.scatter_jd( - table=aug_table, sigma=self.jd_scatter_sigma + aug_table = new_table[new_idx] + # Then randomly remove datapoints, retaining (subsampling_rate)% of lc + if ( + self.subsampling_rate < 1.0 + and len(aug_table["flux"]) > 10 + ): + """ + if self.multiplier < 10.: + subsampling_val = 0.9 + elif self.multiplier < 50.: + subsampling_val = 0.8 + elif self.multiplier < 300.: + subsampling_val = 0.7 + else: + subsampling_val = self.subsampling_rate + subsampled_length = int( + len(new_table["flux"]) * subsampling_val + ) + """ + subsampled_length = int( + len(aug_table["flux"]) * self.subsampling_rate + ) + indices_to_keep = self.rng.choice( + len(aug_table["flux"]), + subsampled_length, + replace=False, ) - noisy_lcs.append(aug_table) + aug_table = aug_table[indices_to_keep] + if self.jd_scatter_sigma > 0: + aug_table = self.scatter_jd( + table=aug_table, sigma=self.jd_scatter_sigma + ) + noisy_lcs.append(aug_table) + else: + noisy_lcs.append(aug_table) + res.append(1) else: - noisy_lcs.append(new_table) - res.append(1) + res.append(0) else: res.append(0) else: - res.append(0) - else: - res.append(1) - noisy_lcs.append(new_table) + res.append(1) + noisy_lcs.append(new_table) else: res.append(0) """ @@ -161,6 +191,12 @@ def noisify_lightcurve(self): print(table.meta["type"]) break + # Augment original BTS table: remove data points according to density distribution + idx = self.drop_points( + table["jd"], table["band"], cadence_scale=self.detection_scale + ) + table = table[idx] + if self.output_format == "parsnip": table.keep_columns( ["jd", "flux", "fluxerr", "magpsf", "sigmapsf", "band", "zp", "zpsys"] @@ -264,15 +300,13 @@ def get_astropy_table(self): for fid, fname in zip([1, 2, 3], ["ztfg", "ztfr", "ztfi"]): phot_tab["band"][phot_tab["fid"] == fid] = fname + # Add 0.03 error floor to the errors in mag!! @Melissa Amenouche + # phot_tab["sigmapsf"] = np.sqrt(phot_tab["sigmapsf"]**2 + 0.03**2) + phot_tab["flux"] = 10 ** (-(phot_tab["magpsf"] - 25) / 2.5) phot_tab["fluxerr"] = np.abs( phot_tab["flux"] * (-phot_tab["sigmapsf"] / 2.5 * np.log(10)) ) - # Add 7% to the errors in flux!! - phot_tab["fluxerr"] = phot_tab["fluxerr"] * 1.07 - phot_tab["sigmapsf"] = np.abs( - (2.5 * np.log(10)) * (phot_tab["fluxerr"] / phot_tab["flux"]) - ) phot_tab["zp"] = 25 phot_tab["zpsys"] = "ab" phot_tab.meta["z"] = self.header["bts_z"] @@ -286,35 +320,53 @@ def get_noisified_data(self, lc_table): this_lc = this_lc[this_lc["flux"] > 0.0] if len(this_lc) == 0: return Table() - mag = this_lc["magpsf"] - magzp_orig = this_lc["magzp_orig"] - magzpunc_orig = this_lc["magzpunc_orig"] - fid = this_lc["fid"] - flux_old = this_lc["flux"] - fluxerr_old = this_lc["fluxerr"] + flux_obs = this_lc["flux"] + fluxerr_obs = this_lc["fluxerr"] truez = float(this_lc.meta["bts_z"]) zp = this_lc["zp"] new_z = self.rng.choice(self.z_valid_list) - delta_m = cosmo.distmod(new_z) - cosmo.distmod(truez) - flux_new = 10 ** ((25 - mag - delta_m.value) / 2.5) - scale = ( + d_scale = ( cosmo.luminosity_distance(truez) ** 2 / cosmo.luminosity_distance(new_z) ** 2 + ).value + + mask_signoise = flux_obs / fluxerr_obs < 5.0 + flux_true = flux_obs - self.rng.normal( + scale=fluxerr_obs + ) # get true initial flux (removing scatter) + flux_true[mask_signoise] = flux_obs[ + mask_signoise + ] # but don't remove if S/N is poor + + negflux = flux_true < 0.0 # set minimum flux + flux_true[negflux] = 0.01 + + flux_z = flux_true * d_scale + ef = 0.5425067 + eb = 5.36951258 + err_scale = (1 + ef**2) / eb**2 + fluxerr_z_obs = ( + np.sqrt( + ( + d_scale / (1 + 1 / (flux_true * err_scale)) + + 1 / (1 + err_scale * flux_true) + ) + ) + * fluxerr_obs ) - df_f_old = fluxerr_old / flux_old - df_f_new = 1 / scale * df_f_old - - flux_obs = flux_new + self.rng.normal(scale=np.sqrt(flux_new)) - fluxerr_obs = flux_new * df_f_new + flux_z_obs = flux_z + self.rng.normal(scale=fluxerr_z_obs) - mag_new = self.flux_to_mag(flux_obs.value, zp).value - magerr_new = np.abs((2.5 * np.log(10)) * (fluxerr_obs / flux_obs)).value + zp_new = this_lc["zp"].data + mag_new = self.flux_to_mag(flux_z_obs, zp_new) + magerr_new = np.abs((2.5 * np.log(10)) * (fluxerr_z_obs / flux_z_obs)) jd_new = this_lc["jd"].data band_new = this_lc["band"].data - zp_new = this_lc["zp"].data zpsys_new = this_lc["zpsys"].data + magzp_orig = this_lc["magzp_orig"].data + magzpunc_orig = this_lc["magzpunc_orig"].data + fid_new = this_lc["fid"].data if len(mag_new) > 0: phot = { @@ -323,10 +375,10 @@ def get_noisified_data(self, lc_table): "sigmapsf": magerr_new, "magzp_orig": magzp_orig, "magzpunc_orig": magzpunc_orig, - "fid": fid, + "fid": fid_new, "band": band_new, - "flux": flux_obs, - "fluxerr": fluxerr_obs, + "flux": flux_z_obs, + "fluxerr": fluxerr_z_obs, "zp": zp_new, "zpsys": zpsys_new, } @@ -387,6 +439,47 @@ def scatter_jd(self, table: Table, sigma: float = 0.05) -> Table: return table + def drop_points( + self, x, band, time_period: float = 5.0, cadence_scale: float = 0.5 + ): + # Split the data based on 'band' + band_indices = { + "r": (band == "ztfr"), + "g": (band == "ztfg"), + "i": (band == "ztfi"), + } + # Initialise an empty list to store retained indices for each band + retained_indices_list = [] + for band_label, indices in band_indices.items(): + # Filter x based on the current band + band_x = x[indices] + # Calculate the number of detections within each time period for the current band + num_detections = np.array( + [ + sum( + (band_x >= i - time_period / 2) & (band_x < i + time_period / 2) + ) + for i in band_x + ] + ) + # Calculate the density of detections for the current band + density = num_detections / time_period # len(x) + # Drop points randomly based on the probability distribution for the current band + random_numbers = self.rng.uniform(0, 1, len(density)) + retained_indices = [ + i + for i, rand_num in enumerate(random_numbers) + if rand_num < cadence_scale / density[i] + ] + # Add the retained indices for the current band to the list + retained_indices_list.append(np.where(indices)[0][retained_indices]) + + # Combine the retained indices for all bands + combined_retained_indices = np.concatenate(retained_indices_list) + # Sort the combined retained indices + combined_retained_indices = np.sort(combined_retained_indices) + return combined_retained_indices + @staticmethod def flux_to_mag(flux, zp): """