# Notebook for configuring jobs and processing
#### Fist we export the needed

In [1]:
import numpy as np
import pickle, sys, os, json
import matplotlib.pyplot as plt

# Other auxiliar scripts
sys.path.insert(0, os.getcwd() + "/../scripts/")
import lstpipeline

# <span style="color:blue">1. Configuring file for each job to be sent</span>

In [2]:
"""Number of subruns in one job"""
n_subruns_job = 5

### Paths and filenames

In [3]:
# fname_config_scaling = os.path.join("config", "config_scaling_parameters.json")

# # Code to read it
# with open(fname_config_scaling, "r") as json_file:
#     configuration_dictionary = json.load(json_file)

# # Now we wxtract all the variables with the same name as in the dictionary
# source_name = configuration_dictionary["source_name"]
# for superkey in ["fit_parameters", "paths"]:
#     for key, value in configuration_dictionary[superkey].items():
#         globals()[key] = value

In [4]:
# Root path of this script
root = os.getcwd() + "/"
# We need to create also a folder to store the slurm outputs
root_slurm = root + "objects/output_slurm"
# Path to store the configuration file we are going to use
root_config = root + "config/"

file_job_config = root_config + "job_config_runs.txt"

# STANDARD paths ---------
root_dl1 = "/fefs/aswg/data/real/DL1/*/v0.*/tailcut84/"
root_rfs = "/fefs/aswg/data/models/AllSky/20230901_v0.10.4_allsky_base_prod/"
root_mcs = "/fefs/aswg/data/mc/DL2/AllSky/20230901_v0.10.4_allsky_base_prod/TestingDataset/"

# Create the paths that do not exist
for path in [root_config, root_slurm]:
    if not os.path.exists(path):
        os.makedirs(os.path.join(path), exist_ok=True)

### Run numbers we have

In [5]:
runs = [2853, 2854, 2855, 2913, 2914, 2916, 2917, 2918, 2919, 2922, 2923,
       2924, 2925, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2952,
       2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2965, 2966,
       2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977,
       2988, 2989, 2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3009,
       3010, 3011, 3012, 3087, 3088, 3089, 3090, 3093, 3094, 3095, 3096,
       3097, 3098, 3099, 3100, 3101, 3143, 3144, 3169, 3170, 3171, 3231,
       3232, 3233, 3234, 3235, 3236, 3237, 3243, 3244, 3245, 3264, 3265,
       3266, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279, 3318, 3319, 3320, 3321, 3328, 3329, 3330, 3338,
       3339, 3340, 3355, 3356, 3373, 3382, 3383, 3384, 3385, 3514, 3515,
       3516, 3583, 3584, 3585, 3586, 3598, 3599, 3600, 3601, 3615, 3616,
       3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3649, 3650,
       3651, 3652, 3653, 3655, 3657, 3659, 3672, 3673, 3674, 3675, 3676,
       3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3703, 3704, 3705,
       3706, 3707, 3708, 3709, 3710, 3711, 3712, 3725, 3729, 3730, 3894,
       3895, 3925, 3953, 3954, 3955, 3956, 3957, 3959, 3960, 3961, 3962,
       3973, 3974, 3975, 3976, 3977, 3979, 3980, 3981, 3982, 3983, 4007,
       4008, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4027, 4028, 4029,
       4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4067, 4068, 4069,
       4070, 4071, 4073, 4074, 4075, 4086, 4087, 4088, 4089, 4090, 4091,
       4092, 4093, 4094, 4095, 4125, 4126, 4127, 4128, 4129, 4145, 4146,
       4147, 4148, 4149, 4395, 5737, 5738, 5758, 5759, 5775, 5776, 5777,
       5798, 5799, 5800, 5955, 5956, 5957, 5958, 5992, 5993, 5994, 5995,
       6011, 6013, 6014, 6016, 6039, 6040, 6041, 6042, 6043, 6044, 6045,
       6068, 6069, 6070, 6071, 6072, 6073, 6146, 6147, 6148, 6166, 6167,
       6168, 6169, 6170, 6171, 6172, 6192, 6193, 6194, 6195, 6239, 6240,
       6241, 6242, 6279, 6281, 6282, 6301, 6302, 6303, 6304, 6323, 6324,
       6851, 6852, 6853, 6854, 6855, 6871, 6872, 6873, 6874, 6875, 6887,
       6888, 6889, 6890, 6891, 6892, 6893, 6894, 6895, 6896, 6897, 6898,
       6935, 6936, 6943, 6944, 6945, 6962, 6963, 6964, 6965, 6966, 6968,
       6969, 6990, 6992, 7084, 7086, 7087, 7088, 7097, 7098, 7099, 7100,
       7101, 7102, 7105, 7106, 7133, 7134, 7135, 7136, 7137, 7138, 7139,
       7140, 7141, 7142, 7143, 7161, 7168, 7169, 7170, 7171, 7172, 7173,
       7174, 7195, 7196, 7197, 7198, 7199, 7200, 7201, 7202, 7227, 7228,
       7229, 7230, 7231, 7232, 7233, 7234, 7235, 7236, 7237, 7238, 7253,
       7254, 7255, 7256, 7274, 7275, 7276, 7277, 7278, 7279, 7280, 7281,
       7282, 7301, 7302, 7303, 7304, 7305, 7434, 7465, 7466]


runs = [2853, 2854, 2855, 2913, 2914, 2916, 2917, 2918, 2919, 2922, 2923,
       2924, 2925, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2952,
       2953, 2954, 2955, 2956, 2957, 2958, 2959, 2960, 2961, 2965, 2966,
       2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977,
       2988, 2989, 2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3009,
       3010, 3011, 3012, 3087, 3088, 3089, 3090, 3093, 3094, 3095, 3096,
       3097, 3098, 3099, 3100, 3101, 3143, 3144, 3169, 3170, 3171, 3231,
       3232, 3233, 3234, 3235, 3236, 3237, 3243, 3244, 3245, 3264, 3265,
       3266, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279]

# Selecting the runs we want to analyse
runs = runs[:]

print(f"Computing for {len(runs)} runs")

Computing for 102 runs


### Reading the number of subruns from the datachecks

In [6]:
%%time
# We create a empty dictionary to store all the information needed inside
dict_dchecks = {}
for run in runs:
    dict_dchecks[run] = {
        "run_num" : run,
    }

dict_dchecks = lstpipeline.add_dl1_paths_to_dict(dict_dchecks, root_dl1)

dict_run_sruns = {}
for run in runs:
    fnames_dl1 = np.sort(dict_dchecks[run]["dl1a"]["srunwise"])
    srun_numbers = [int(f.split(".")[-2]) for f in fnames_dl1]
    dict_run_sruns[run] = srun_numbers


Adding dl1  data to dictionary (Run 2853)...

Adding dl1  data to dictionary (Run 2854)...

Adding dl1  data to dictionary (Run 2855)...

Adding dl1  data to dictionary (Run 2913)...

Adding dl1  data to dictionary (Run 2914)...

Adding dl1  data to dictionary (Run 2916)...

Adding dl1  data to dictionary (Run 2917)...

Adding dl1  data to dictionary (Run 2918)...

Adding dl1  data to dictionary (Run 2919)...

Adding dl1  data to dictionary (Run 2922)...

Adding dl1  data to dictionary (Run 2923)...

Adding dl1  data to dictionary (Run 2924)...

Adding dl1  data to dictionary (Run 2925)...

Adding dl1  data to dictionary (Run 2929)...

Adding dl1  data to dictionary (Run 2930)...

Adding dl1  data to dictionary (Run 2931)...

Adding dl1  data to dictionary (Run 2932)...

Adding dl1  data to dictionary (Run 2933)...

Adding dl1  data to dictionary (Run 2934)...

Adding dl1  data to dictionary (Run 2949)...

Adding dl1  data to dictionary (Run 2950)...

Adding dl1  data to dictionary (R

CPU times: user 35 s, sys: 3.43 s, total: 38.4 s
Wall time: 1min 7s


### Storing the subrun numbers in sets of certain amount of subruns inside the same job

In [7]:
n_jobs = 0
with open(file_job_config, "w") as file:
    for run in runs:
        count_sruns = 0
        sruns = np.sort(dict_run_sruns[run])
        tmp_str = ""
        for srun in sruns:
            tmp_str = tmp_str + f"_{srun}"
            # Launching a certain amount of subruns together
            if (count_sruns % n_subruns_job == 0 and srun != 0) or (srun == max(sruns)):
                tmp_str_splitted = tmp_str.split("_")
                if len(tmp_str_splitted) != 2:
                    tmp_str = "_" + tmp_str_splitted[1] + "_" + tmp_str_splitted[-1]
                file.write(f"{run}{tmp_str}\n")
                tmp_str = ""
                n_jobs += 1
            count_sruns += 1
print(f"The final amount of jobs is {n_jobs}")

The final amount of jobs is 2497


# <span style="color:blue">2. Configuring file for the fit parameters and etc</span>
#### Is stored permanently inside `config/config_scaling_parameters.json`

In [8]:
# Permanent loczation of the configuration file
fname_config_scaling = os.path.join("config", "config_scaling_parameters.json")

""" Source name in order to just complete the results file, and in order to improve run organization."""
source_name = "crab"

""" Fit parameters
Chosen limits in intensity (p.e.) for applying the fit i.e. the power law will be fitted only with the points within this range."""
limits_intensity = [316, 562]
""" For the positive scaling cases (most of them), we need to have a lower  limit in intensity. Thi slimit is used for the subset of 
events that are scaled just to find which is the scaling value. We use a very low limit by default 60 p.e. compared to the lower 
limit of the fit 316 p.e. because in the worst cases we will have a very non-linear scaling that will displace significantly the 
events intensities."""
limits_intensity_extended = 60

""" Power law parameters for the reference
All these parameters are taken from a common analysis of the full dataset Where the period of end of 2022 and start 2023 is 
taken as reference for good runs. Then we take as reference the mean power law parameters in that period. p0 is the 
normalization factor and p1 is the slope."""
ref_p0 =  1.74 
ref_p1 = -2.23

""" Threshold in statistics for the last subrun
The limit in number of events after cleaning that we need to consider the last subrun has enough statistics to perform the 
analysis over it. Otherwise the values of the scaling that will be applied to this last rubrun are the same that are applied 
to the last last subrun."""
statistics_threshold = 4000

""" The number of tries in the dl1 files creation. Due that dl1 file creation have some bug that causes the file to not be 
present but not notify with any error. We implemented a loop and a check to see if the file exists. """
number_tries_dl1 = 3

""" Parameters for the empyrical fits for Zenith Distance corrections Are simply two 2 degree polynomials for each variable 
of the power law."""
p0a, p0b, p0c = -0.44751321, 3.62502037, -1.43611437
p1a, p1b, p1c = -2.89253919, 0.99443581, -0.34013068

# Standard paths for data in the IT cluster ---------
root_dl1 = "/fefs/aswg/data/real/DL1/*/v0.*/tailcut84/"
root_rfs = "/fefs/aswg/data/models/AllSky/20230927_v0.10.4_crab_tuned/"
root_mcs = "/fefs/aswg/data/mc/DL2/AllSky/20230927_v0.10.4_crab_tuned/TestingDataset/"

# Root path of this script
root = os.getcwd()
# Path to store the configuration file we are going to use
config_file = os.path.join(root, "config/standard_config.json")
# Path to store objects
root_objects = os.path.join(root, f"objects/")
# Data main directory
root_data = os.path.join(root, f"../../data/cherenkov_transparency_corrections/{source_name}/")
# Sub-dl1 objects directory
root_sub_dl1 = os.path.join(root_objects, "sub_dl1/")
# Directory for the results of the fit of each run
root_results = os.path.join(root_objects, "results_fits/")
root_final_results = os.path.join(root_objects, "final_results_fits/")
# Configuration file for the job launching
file_job_config = os.path.join(root, "config", "job_config_runs.txt")
# File for temporal bash scripts
file_temporal_bash = os.path.join(root, "objects", "tmp_bash/")

# Directories for the data
dir_dl1b_scaled = os.path.join(root_data, "dl1_scaled/")
dir_dl1m_scaled = os.path.join(root_data, "dl1_merged_scaled/")
dir_dl2_scaled = os.path.join(root_data, "dl2_scaled/")
dir_dl2 = os.path.join(root_data, "dl2/")
dir_dl3_scaled_base = os.path.join(root_data, "dl3_scaled/")
dir_dl3_base = os.path.join(root_data, "dl3/")
dir_irfs = os.path.join(root_data, "irfs/")

### Insert in a dictionary

In [9]:
configuration_dictionary = {
  "source_name": source_name,
  "fit_parameters": {
    "limits_intensity": limits_intensity,
    "limits_intensity_extended": limits_intensity_extended,
    "ref_p0": ref_p0, "ref_p1": ref_p1,
    "statistics_threshold": statistics_threshold,
    "p0a": p0a, "p0b": p0b, "p0c": p0c,
    "p1a": p1a, "p1b": p1b, "p1c": p1c,
    "number_tries_dl1": number_tries_dl1
  },
  "paths": {
    "root_dl1": root_dl1,
    "root_rfs": root_rfs,
    "root_mcs": root_mcs,
    "root": root,
    "config_file": config_file,
    "root_objects": root_objects,
    "root_data": root_data,
    "root_sub_dl1": root_sub_dl1,
    "root_results": root_results,
    "root_final_results": root_final_results,
    "file_job_config": file_job_config,
    "file_temporal_bash": file_temporal_bash,
    "dir_dl1b_scaled": dir_dl1b_scaled,
    "dir_dl1m_scaled": dir_dl1m_scaled,
    "dir_dl2_scaled": dir_dl2_scaled,
    "dir_dl2": dir_dl2,
    "dir_dl3_scaled_base": dir_dl3_scaled_base,
    "dir_dl3_base": dir_dl3_base,
    "dir_irfs": dir_irfs
  }
}

# Store in a file
# Open the file in write mode
with open(fname_config_scaling, "w") as json_file:
    json.dump(configuration_dictionary, json_file)

#### Copy this code to export it in another script

In [15]:
# # Code to read it
# with open(fname_config_scaling, "r") as json_file:
#     configuration_dictionary = json.load(json_file)

# # Now we wxtract all the variables with the same name as in the dictionary
# source_name = configuration_dictionary["source_name"]
# for superkey in ["fit_parameters", "paths"]:
#     for key, value in configuration_dictionary[superkey].items():
#         globals()[key] = value

# <span style="color:blue">3. Generating the full grid of IRFs</span>
#### It might take some time if was not already computed

In [None]:
# root_scripts  = "/fefs/aswg/workspace/juan.jimenez/cherenkov_transparency_corrections/data_processing_srunwise/"
# python_script = f"{root_scripts}/script_1_scaling.py"

# ! python $python_script "irfs"