In [1]:
import numpy as np
import pickle, sys, os, json

# Other auxiliar scripts
sys.path.insert(0, os.getcwd() + "/../scripts/")
import lstpipeline

import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

"""Number of subruns in one job"""
n_subruns_job = 5

### Paths and filenames

In [None]:
# Root path of this script
root = os.getcwd() + "/"
# We need to create also a folder to store the slurm outputs
root_slurm = root + "objects/output_slurm"
# Path to store the configuration file we are going to use
root_config = root + "config/"

file_job_config = root_config + "job_config_runs.txt"

# STANDARD paths ---------
root_dl1 = "/fefs/aswg/data/real/DL1/*/v0.*/tailcut84/"
root_rfs = "/fefs/aswg/data/models/AllSky/20230901_v0.10.4_allsky_base_prod/"
root_mcs = "/fefs/aswg/data/mc/DL2/AllSky/20230901_v0.10.4_allsky_base_prod/TestingDataset/"

# Create the paths that do not exist
for path in [root_config, root_slurm]:
    if not os.path.exists(path):
        os.makedirs(os.path.join(path), exist_ok=True)

### Run numbers we have

In [10]:
runs_performance_paper = [
       2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
       2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
       2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3093, 3094, 3095,
       3096, 3231, 3232, 3243, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279, 3318, 3319, 3320, 3321, 3328, 3329, 3330, 3338,
       3339, 3340, 3355, 3356, 3373, 3598, 3599, 3600, 3601, 3615, 3632,
       3633, 3634, 3635, 3672, 3673, 3674, 3675, 3676, 3677, 3706, 3707,
       3708, 4067, 4068, 4086, 4087, 6045, 6073, 6304, 6872, 6873, 6874,
       6875, 6892, 6893, 6894, 6895, 7097, 7098, 7099, 7133, 7136, 7161,
       7195, 7196, 7197, 7199, 7200, 7227, 7228, 7231, 7232, 7233, 7253,
       7254, 7255, 7256, 7274, 7275, 7276, 7277
]
runs_performance_paper = [
       2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
       2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
       2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3093, 3094, 3095,
       3096, 3231, 3232, 3243, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279, 3318, 3319, 3320, 3321, 3328, 3329, 3330, 3338,
       3339, 3340, 3355, 3356, 3373, 3598, 3599, 3600, 3601, 3615, 3632,
       3633, 3634, 3635
]

# runs_performance_paper = [
#        2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
#        2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
#        2990, 2991, 2992, 
# ]

runs_good_period = [
       10668, 10671, 10672, 10673, 10674, 10917, 10950, 11088, 11125,
       11166, 11191, 11192, 11193, 11196, 11197, 11208, 11209, 11219,
       11221, 11222, 11224, 11225, 11228, 11229, 11230, 11231, 11237,
       11238, 11239, 11240, 11241, 11243, 11244, 11245, 11246, 11247,
       11249, 11250, 11251, 11252, 11254, 11257, 11258, 11259, 11260,
       11261, 11262, 11263, 11264, 11265, 11266, 11267, 11268, 11269,
       11270, 11271, 11272, 11273, 11276, 11277, 11278, 11280, 11282,
       11354, 11355, 11357, 11358, 11359, 11360, 11361, 11363, 11378,
       11379, 11380, 11381, 11382, 11383, 11384, 11408, 11409, 11410,
       11439, 11609, 11610, 11634, 11649, 11650, 11651, 11652, 11671,
       11676, 11711, 11712, 11834, 11919, 11920, 11930
]

runs_other_good = [ 
        2758,  2759,  3088,  3098,  3584,  3586,  3683,  3705,  3725,
        3894,  4010,  4011,  4015,  4129,  5955,  5957,  5958,  5992,
        5993,  5995,  6011,  6282,  7084,  7087,  7140,  7142,  7143,
        7169,  7170,  7172,  7174,  7201,  9252,  9274,  9434,  9436,
        9596,  9687,  9689,  9996, 10034, 10035, 10083, 10084, 10085,
       10089, 10090, 10590, 10591, 10592, 10593, 10595, 10596, 10597,
       10599, 10600, 10601, 10602, 10603, 10604, 10630, 10633, 10634,
       10635, 10636, 10637, 10638, 12048, 12077, 12291, 12669, 12742,
       12767, 14629, 14670, 15570, 15571, 15572, 15641, 15727, 15780,
       15969, 15971, 16001, 16052, 16111, 16237, 16238, 16286, 16337,
       16409
]

runs_bad_rates_good_weather = [ 9686, 10258, 10260, 10262, 10263]

runs_bad_rates_bad_weather  = [
        2767,  5738,  5800,  6192,  6194,  6852,  6853,  6962,  6963,
        6964,  6965,  6966,  6990,  9253,  9715,  9716,  9882, 10077,
       10078, 10264, 10526, 10527, 10528
]

# Selecting the runs we want to analyse
runs = runs_performance_paper[:1]

print(f"Computing for {len(runs)} runs")

Computing for 1 runs


### Reading some of the information in the datachecks

In [11]:
%%time
# We create a empty dictionary to store all the information needed inside
dict_dchecks = {}
for run in runs:
    dict_dchecks[run] = {
        "run_num" : run,
    }

dict_dchecks = lstpipeline.add_dl1_paths_to_dict(dict_dchecks, root_dl1)


Adding dl1  data to dictionary (Run 2914)...
...Finished adding dl1 data to dictionary


CPU times: user 6.12 s, sys: 2.37 s, total: 8.49 s
Wall time: 11 s


### For each runs having a set of subruns

In [12]:
dict_run_sruns = {}
for run in runs:
    fnames_dl1 = np.sort(dict_dchecks[run]["dl1a"]["srunwise"])
    srun_numbers = [int(f.split(".")[-2]) for f in fnames_dl1]
    dict_run_sruns[run] = srun_numbers

### Storing the jobs in sets of certain amount of subruns inside the same job

In [13]:
n_jobs = 0
with open(file_job_config, "w") as file:

    for run in runs:
    
        count_sruns = 0
        sruns = np.sort(dict_run_sruns[run])
    
        tmp_str = ""
        for srun in sruns:
            
            tmp_str = tmp_str + f"_{srun}"
    
            # Launching a certain amount of subruns together
            if (count_sruns % n_subruns_job == 0 and srun != 0) or (srun == max(sruns)):

                tmp_str_splitted = tmp_str.split("_")
                if len(tmp_str_splitted) != 2:
                    tmp_str = "_" + tmp_str_splitted[1] + "_" + tmp_str_splitted[-1]
                
                file.write(f"{run}{tmp_str}\n")
                tmp_str = ""
                n_jobs += 1
    
            count_sruns += 1
print(f"The final amount of jobs is {n_jobs}")

The final amount of jobs is 23


# Generating the configuration file
## Configure here:

In [2]:
fname_config_scaling = os.path.join("config", "config_scaling_parameters.json")

In [None]:
""" Source name in order to just complete the results file, and in order to improve run organization."""
source_name = "crab"

""" Fit parameters
Chosen limits in intensity (p.e.) for applying the fit i.e. the power law will be fitted only with the points within this range."""
limits_intensity = [316, 562]
""" For the positive scaling cases (most of them), we need to have a lower  limit in intensity. Thi slimit is used for the subset of 
events that are scaled just to find which is the scaling value. We use a very low limit by default 60 p.e. compared to the lower 
limit of the fit 316 p.e. because in the worst cases we will have a very non-linear scaling that will displace significantly the 
events intensities."""
limits_intensity_extended = 60

""" Power law parameters for the reference
All these parameters are taken from a common analysis of the full dataset Where the period of end of 2022 and start 2023 is 
taken as reference for good runs. Then we take as reference the mean power law parameters in that period. p0 is the 
normalization factor and p1 is the slope."""
ref_p0 =  1.74 
ref_p1 = -2.23

""" Threshold in statistics for the last subrun
The limit in number of events after cleaning that we need to consider the last subrun has enough statistics to perform the 
analysis over it. Otherwise the values of the scaling that will be applied to this last rubrun are the same that are applied 
to the last last subrun."""
statistics_threshold = 4000

""" Parameters for the empyrical fits for Zenith Distance corrections Are simply two 2 degree polynomials for each variable 
of the power law."""
p0a, p0b, p0c = -0.44751321, 3.62502037, -1.43611437
p1a, p1b, p1c = -2.89253919, 0.99443581, -0.34013068

# Standard paths for data in the IT cluster ---------
root_dl1 = "/fefs/aswg/data/real/DL1/*/v0.*/tailcut84/"
root_rfs = "/fefs/aswg/data/models/AllSky/20230927_v0.10.4_crab_tuned/"
root_mcs = "/fefs/aswg/data/mc/DL2/AllSky/20230927_v0.10.4_crab_tuned/TestingDataset/"

# Root path of this script
root = os.getcwd()
# Path to store the configuration file we are going to use
config_file = os.path.join(root, "config/standard_config.json")
# Path to store objects
root_objects = os.path.join(root, f"objects/")
# Data main directory
root_data = os.path.join(root, f"../../data/cherenkov_transparency_corrections/{source_name}/")
# Sub-dl1 objects directory
root_sub_dl1 = os.path.join(root_objects, "sub_dl1/")
# Directory for the results of the fit of each run
root_results = os.path.join(root_objects, "results_fits/")
root_final_results = os.path.join(root_objects, "final_results_fits/")
# Configuration file for the job launching
file_job_config = os.path.join(root, "config", "job_config_runs.txt")
# File for temporal bash scripts
file_temporal_bash = os.path.join(root, "objects", "tmp_bash/")

# Directories for the data
dir_dl1b_scaled = os.path.join(root_data, "dl1_scaled/")
dir_dl1m_scaled = os.path.join(root_data, "dl1_merged_scaled/")
dir_dl2_scaled = os.path.join(root_data, "dl2_scaled/")
dir_dl2 = os.path.join(root_data, "dl2/")
dir_dl3_scaled_base = os.path.join(root_data, "dl3_scaled/")
dir_dl3_base = os.path.join(root_data, "dl3/")
dir_irfs = os.path.join(root_data, "irfs/")

### Insert in a dictionary

In [None]:
configuration_dictionary = {
  "source_name": source_name,
  "fit_parameters": {
    "limits_intensity": limits_intensity,
    "limits_intensity_extended": limits_intensity_extended,
    "ref_p0": ref_p0, "ref_p1": ref_p1,
    "statistics_threshold": statistics_threshold,
    "p0a": p0a, "p0b": p0b, "p0c": p0c,
    "p1a": p1a, "p1b": p1b, "p1c": p1c
  },
  "paths": {
    "root_dl1": root_dl1,
    "root_rfs": root_rfs,
    "root_mcs": root_mcs,
    "root": root,
    "config_file": config_file,
    "root_objects": root_objects,
    "root_data": root_data,
    "root_sub_dl1": root_sub_dl1,
    "root_results": root_results,
    "root_final_results": root_final_results,
    "file_job_config": file_job_config,
    "file_temporal_bash": file_temporal_bash,
    "dir_dl1b_scaled": dir_dl1b_scaled,
    "dir_dl1m_scaled": dir_dl1m_scaled,
    "dir_dl2_scaled": dir_dl2_scaled,
    "dir_dl2": dir_dl2,
    "dir_dl3_scaled_base": dir_dl3_scaled_base,
    "dir_dl3_base": dir_dl3_base,
    "dir_irfs": dir_irfs
  }
}

# Store in a file
# Open the file in write mode
with open(fname_config_scaling, "w") as json_file:
    json.dump(configuration_dictionary, json_file)

In [7]:
# # Code to read it
# with open(fname_config_scaling, "r") as json_file:
#     configuration_dictionary = json.load(json_file)

# # Now we wxtract all the variables with the same name as in the dictionary
# source_name = configuration_dictionary["source_name"]
# for superkey in ["fit_parameters", "paths"]:
#     for key, value in configuration_dictionary[superkey].items():
#         globals()[key] = value

# Generate the IRFS
### This will only need to be run one time

In [16]:
# root_scripts  = "/fefs/aswg/workspace/juan.jimenez/cherenkov_transparency_corrections/data_processing_srunwise/"
# python_script = f"{root_scripts}/script_1_scaling.py"

# ! python $python_script "irfs"