simeonreusch · simeonreusch · Aug 15, 2023 · Jul 3, 2023 · Jul 3, 2023 · Jul 3, 2023
diff --git a/tests/test_noisification.py b/tests/test_noisification.py
@@ -8,6 +8,7 @@
 from ztfparsnip.create import CreateLightcurves
 
 logging.getLogger("ztfparsnip.create").setLevel(logging.DEBUG)
+logging.getLogger("ztfparsnip.io").setLevel(logging.DEBUG)
 
 
 class TestNoisification(unittest.TestCase):
@@ -54,92 +55,61 @@ def test_noisification_csv(self):
         sample.select()
         sample.create(plot_debug=True)
 
-        for name in ["ZTF19aapreis", "ZTF20acvmzfv"]:
+        available = [x for x in sample.test_dir.glob("*") if x.is_file()]
+
+        for entry in available:
+            print(entry)
+
+        for name in ["ZTF19aapreis", "ZTF18aamvfeb"]:
             path = sample.test_dir / f"{name}.csv"
             pd.read_csv(path, comment="#")
 
-        infile_noisified = sample.train_dir / "ZTF18aavvnzu_3.csv"
+        infile_noisified = sample.train_dir / "ZTF20acueziy_2.csv"
         df = pd.read_csv(infile_noisified, comment="#", index_col=0)
         df.sort_values(by=["obsmjd"], inplace=True)
+
         mags = df.magpsf.values
         reference_mags = [
-            25.29078394,
-            25.76189095,
-            27.2655531,
-            24.6668269,
-            24.53136655,
-            np.nan,
-            25.57881118,
-            24.96131425,
-            24.6494601,
-            29.39622331,
+            20.7862801545962,
+            19.9859143237067,
+            20.3707809492468,
+            19.9429214884179,
+            20.4923597159272,
+            19.888937399982,
+            20.8460105835683,
+            20.7091668978473,
+            20.0292037077898,
+            21.1112591814753,
+            20.1875765243095,
+            21.0763226188922,
+            20.3086801104863,
+            21.9130488366769,
+            20.6240161912531,
+            21.7743936386835,
+            20.9042362388927,
+            20.0740109279902,
+            21.239673495802,
+            20.7024764008374,
+            25.7528842507013,
+            20.4149368461691,
+            21.1107872472987,
+            20.8958085465419,
+            22.5168452113857,
+            21.2957311943224,
+            20.9247303408706,
+            21.1622199708444,
+            22.8048906115971,
+            21.3839012928751,
             np.nan,
-            23.98498964,
-            24.12828652,
-            23.1775983,
-            22.00932895,
-            22.55727056,
-            21.62206101,
-            21.39578053,
-            22.05108525,
-            20.94277367,
-            21.03368154,
-            20.34244796,
-            19.9950083,
-            20.78069822,
-            19.88999309,
-            19.75953625,
-            19.52539882,
-            19.73810262,
-            21.16211422,
-            19.56434958,
-            19.8297774,
-            19.58625593,
-            21.16368346,
-            20.15647694,
-            19.85284108,
-            19.89014672,
-            21.39221394,
-            19.99618702,
-            20.10251282,
-            20.18338371,
-            20.46475862,
-            20.27376621,
-            20.45132811,
-            20.75834641,
-            21.78499175,
-            21.01347781,
-            21.36923858,
-            21.61849326,
-            20.41307089,
-            20.23913687,
-            21.45339639,
-            21.27754412,
-            20.53259678,
-            21.24645026,
-            21.23964233,
-            21.81331908,
-            20.51849827,
-            21.42855531,
-            21.92474183,
-            20.7997237,
-            20.86847922,
-            21.26933793,
-            21.63336927,
-            22.15300501,
-            21.05602829,
-            21.85539486,
-            22.23739681,
-            20.93244031,
-            21.78658614,
-            21.84444574,
-            20.98134349,
-            22.26270503,
-            21.43027687,
-            21.87283486,
-            22.18428394,
-            21.40032939,
-            22.04574997,
+            21.1104346878744,
+            20.9656819124267,
+            21.2908527288881,
+            20.9777549528361,
+            22.1839553755309,
+            22.0672315128775,
+            21.8042786749289,
+            21.7824972084558,
+            24.5796190172382,
         ]
 
         np.testing.assert_almost_equal(df.magpsf.values, reference_mags, decimal=5)
diff --git a/ztfparsnip/create.py b/ztfparsnip/create.py
@@ -30,6 +30,7 @@ def __init__(
         k_corr: bool = True,
         seed: int | None = None,
         bts_baseline_dir: Path = io.BTS_LC_BASELINE_DIR,
+        bl_corrected: bool = True,
         name: str = "train",
         reprocess_headers: bool = False,
         output_format: str = "parsnip",
@@ -53,7 +54,11 @@ def __init__(
         self.plot_magdist = plot_magdist
         self.train_dir = train_dir
         self.plot_dir = plot_dir
-        self.lc_dir = bts_baseline_dir
+        if bl_corrected:
+            self.lc_dir = bts_baseline_dir
+        else:
+            self.lc_dir = io.BTS_LC_DIR
+
         self.testing = testing
 
         self.rng = default_rng(seed=self.seed)
@@ -75,36 +80,46 @@ def __init__(
         if isinstance(self.plot_dir, str):
             self.plot_dir = Path(self.plot_dir)
 
-        if test_dir is None:
-            self.test_dir = self.train_dir.resolve().parent / "test"
+        self.test_dir: Path | None = None
+
+        if self.test_fraction > 0:
+            if test_dir is None:
+                self.test_dir = self.train_dir.resolve().parent / "test"
+            else:
+                self.test_dir = Path(test_dir)
+            self.test_dir.mkdir(exist_ok=True, parents=True)
         else:
-            self.test_dir = Path(test_dir)
+            self.test_dir = None
 
-        for p in [self.train_dir, self.plot_dir, self.test_dir]:
-            if not p.exists():
-                os.makedirs(p)
+        for p in [self.train_dir, self.plot_dir]:
+            p.mkdir(exist_ok=True, parents=True)
 
         self.config = io.load_config()
 
         """
         if we are in the default sample dir, check if files are there,
-        check if files are there an download if not
+        check if files are there and download if not
         """
-        if self.lc_dir == io.BTS_LC_BASELINE_DIR:
+        if self.lc_dir in [io.BTS_LC_BASELINE_DIR, io.BTS_LC_DIR]:
             if not self.testing:
                 nr_files = len([x for x in self.lc_dir.glob("*") if x.is_file()])
             else:
                 nr_files = 0
                 for x in self.lc_dir.glob("*"):
                     if f"{x.name}".split("_")[0] in self.config["test_lightcurves"]:
                         nr_files += 1
-            if (self.testing == False and nr_files < 6841) or (
-                self.testing and nr_files < 10
+
+            if (
+                (self.testing == False and bl_corrected == True and nr_files < 6841)
+                or (self.testing == False and bl_corrected == False and nr_files < 7130)
+                or (self.testing and nr_files < 10)
             ):
                 self.logger.info("Downloading sample")
-                io.download_sample(testing=testing)
+                io.download_sample(testing=testing, bl_corrected=bl_corrected)
 
-        self.ztfids = io.get_all_ztfids(lc_dir=self.lc_dir, testing=self.testing)
+        self.ztfids = io.get_all_ztfids(
+            lc_dir=self.lc_dir, testing=self.testing, bl_corrected=bl_corrected
+        )
 
         classkeys_available = [
             key
@@ -135,7 +150,17 @@ def __init__(
 
         self.logger.info("Creating noisified training data.")
         self.logger.info(
-            f"\n---------------------------------\nSelected configuration\nweights: {weights_info}\nk correction: {self.k_corr}\ntest fraction: {self.test_fraction}\nseed: {self.seed}\noutput format: {self.output_format}\ntraining data output directory: {self.train_dir}\n---------------------------------"
+            f"\n---------------------------------\n"
+            f"Selected configuration"
+            f"\nweights: {weights_info}\n"
+            f"k correction: {self.k_corr}\n"
+            f"test fraction: {self.test_fraction}\n"
+            f"seed: {self.seed}\n"
+            f"output format: {self.output_format}\n"
+            f"training data output directory: {self.train_dir}\n"
+            f"test data output directory: {self.test_dir}\n"
+            f"plot directory: {self.plot_dir}\n"
+            f"---------------------------------"
         )
 
     def get_simple_class(self, classkey: str, bts_class: str) -> str:
@@ -249,6 +274,7 @@ def select(
         for k, v in classes_available.items():
             availability += f"{k}: {classes_available[k]['entries']}\n"
             available_dict.update({k: classes_available[k]["entries"]})
+
         self.logger.info(
             f"\n---------------------------------\nLightcurves available:\n{availability}---------------------------------"
         )
@@ -306,6 +332,7 @@ def create(
         delta_z: float = 0.1,
         SN_threshold: float = 5.0,
         n_det_threshold: int = 5,
+        detection_scale: float = 0.5,
         subsampling_rate: float = 1.0,
         jd_scatter_sigma: float = 0.0,
         n: int | None = None,
@@ -329,6 +356,7 @@ def create(
                     "SN_threshold": SN_threshold,
                     "n_det_threshold": n_det_threshold,
                 },
+                "detection_scale": detection_scale,
                 "subsampling_rate": subsampling_rate,
                 "jd_scatter_sigma": jd_scatter_sigma,
             }
@@ -342,7 +370,8 @@ def create(
                     if c in self.selection.keys():
                         # check if it's a test sample lightcurve
                         if header["name"] in self.test_sample["all"]["ztfids"]:
-                            multiplier = 0
+                            # multiplier = 0
+                            multiplier = self.selection[c]
                             get_test = True
                         else:
                             multiplier = self.selection[c]
@@ -359,21 +388,36 @@ def create(
                             sig_noise_cut=sig_noise_cut,
                             SN_threshold=SN_threshold,
                             n_det_threshold=n_det_threshold,
+                            detection_scale=detection_scale,
                             subsampling_rate=subsampling_rate,
                             jd_scatter_sigma=jd_scatter_sigma,
                             output_format=self.output_format,
                         )
 
                         if get_test:
-                            test_lc, _ = noisify.noisify_lightcurve()
+                            test_lc, noisy_test_lcs = noisify.noisify_lightcurve()
                             if test_lc is not None:
+                                for i, noisy_test_lc in enumerate(noisy_test_lcs):
+                                    noisy_test_lc.meta["name"] = (
+                                        noisy_test_lc.meta["name"] + f"_{i}"
+                                    )
                                 final_lightcurves["bts_test"].append(test_lc)
-                                if self.output_format == "ztfnuclear":
+                                final_lightcurves["bts_test"].extend(noisy_test_lcs)
+                                if (
+                                    self.output_format == "ztfnuclear"
+                                    and self.test_dir is not None
+                                ):
                                     io.save_csv_with_header(
                                         test_lc,
                                         savedir=self.test_dir,
                                         output_format=self.output_format,
                                     )
+                                    for noisy_test_lc in noisy_test_lcs:
+                                        io.save_csv_with_header(
+                                            noisy_test_lc,
+                                            savedir=self.test_dir,
+                                            output_format=self.output_format,
+                                        )
 
                         else:
                             bts_lc, noisy_lcs = noisify.noisify_lightcurve()
@@ -456,7 +500,7 @@ def create(
             # Save h5 files
             for k, v in final_lightcurves.items():
                 if len(v) > 0:
-                    if k == "bts_test":
+                    if k == "bts_test" and self.test_dir is not None:
                         output_dir = self.test_dir
                     else:
                         output_dir = self.train_dir