In [1]:
import numpy as np
import pickle, sys, os

# Other auxiliar scripts
sys.path.insert(0, os.getcwd() + "/../scripts/")
import lstpipeline

import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

"""Number of subruns in one job"""
n_subruns_job = 5

### Paths and filenames

In [2]:
# Root path of this script
root = os.getcwd() + "/"
# We need to create also a folder to store the slurm outputs
root_slurm = root + "objects/output_slurm"
# Path to store the configuration file we are going to use
root_config = root + "config/"

file_job_config = root_config + "job_config_runs.txt"

# STANDARD paths ---------
root_dl1 = "/fefs/aswg/data/real/DL1/*/v0.*/tailcut84/"
root_rfs = "/fefs/aswg/data/models/AllSky/20230901_v0.10.4_allsky_base_prod/"
root_mcs = "/fefs/aswg/data/mc/DL2/AllSky/20230901_v0.10.4_allsky_base_prod/TestingDataset/"

# Create the paths that do not exist
for path in [root_config, root_slurm]:
    if not os.path.exists(path):
        os.makedirs(os.path.join(path), exist_ok=True)

### Run numbers we have

In [3]:
runs_performance_paper = [
       2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
       2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
       2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3093, 3094, 3095,
       3096, 3231, 3232, 3243, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279, 3318, 3319, 3320, 3321, 3328, 3329, 3330, 3338,
       3339, 3340, 3355, 3356, 3373, 3598, 3599, 3600, 3601, 3615, 3632,
       3633, 3634, 3635, 3672, 3673, 3674, 3675, 3676, 3677, 3706, 3707,
       3708, 4067, 4068, 4086, 4087, 6045, 6073, 6304, 6872, 6873, 6874,
       6875, 6892, 6893, 6894, 6895, 7097, 7098, 7099, 7133, 7136, 7161,
       7195, 7196, 7197, 7199, 7200, 7227, 7228, 7231, 7232, 7233, 7253,
       7254, 7255, 7256, 7274, 7275, 7276, 7277
]
runs_performance_paper = [
       2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
       2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
       2990, 2991, 2992, 3004, 3005, 3006, 3007, 3008, 3093, 3094, 3095,
       3096, 3231, 3232, 3243, 3270, 3271, 3272, 3273, 3274, 3275, 3276,
       3277, 3278, 3279, 3318, 3319, 3320, 3321, 3328, 3329, 3330, 3338,
       3339, 3340, 3355, 3356, 3373
]

# runs_performance_paper = [
#        2914, 2929, 2930, 2931, 2932, 2933, 2934, 2949, 2950, 2967, 2968,
#        2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2977, 2988, 2989,
#        2990, 2991, 2992, 
# ]

runs_good_period = [
       10668, 10671, 10672, 10673, 10674, 10917, 10950, 11088, 11125,
       11166, 11191, 11192, 11193, 11196, 11197, 11208, 11209, 11219,
       11221, 11222, 11224, 11225, 11228, 11229, 11230, 11231, 11237,
       11238, 11239, 11240, 11241, 11243, 11244, 11245, 11246, 11247,
       11249, 11250, 11251, 11252, 11254, 11257, 11258, 11259, 11260,
       11261, 11262, 11263, 11264, 11265, 11266, 11267, 11268, 11269,
       11270, 11271, 11272, 11273, 11276, 11277, 11278, 11280, 11282,
       11354, 11355, 11357, 11358, 11359, 11360, 11361, 11363, 11378,
       11379, 11380, 11381, 11382, 11383, 11384, 11408, 11409, 11410,
       11439, 11609, 11610, 11634, 11649, 11650, 11651, 11652, 11671,
       11676, 11711, 11712, 11834, 11919, 11920, 11930
]

runs_other_good = [ 
        2758,  2759,  3088,  3098,  3584,  3586,  3683,  3705,  3725,
        3894,  4010,  4011,  4015,  4129,  5955,  5957,  5958,  5992,
        5993,  5995,  6011,  6282,  7084,  7087,  7140,  7142,  7143,
        7169,  7170,  7172,  7174,  7201,  9252,  9274,  9434,  9436,
        9596,  9687,  9689,  9996, 10034, 10035, 10083, 10084, 10085,
       10089, 10090, 10590, 10591, 10592, 10593, 10595, 10596, 10597,
       10599, 10600, 10601, 10602, 10603, 10604, 10630, 10633, 10634,
       10635, 10636, 10637, 10638, 12048, 12077, 12291, 12669, 12742,
       12767, 14629, 14670, 15570, 15571, 15572, 15641, 15727, 15780,
       15969, 15971, 16001, 16052, 16111, 16237, 16238, 16286, 16337,
       16409
]

runs_bad_rates_good_weather = [ 9686, 10258, 10260, 10262, 10263]

runs_bad_rates_bad_weather  = [
        2767,  5738,  5800,  6192,  6194,  6852,  6853,  6962,  6963,
        6964,  6965,  6966,  6990,  9253,  9715,  9716,  9882, 10077,
       10078, 10264, 10526, 10527, 10528
]

# Selecting the runs we want to analyse
runs = runs_performance_paper

### Reading some of the information in the datachecks

In [4]:
%%time
# We create a empty dictionary to store all the information needed inside
dict_dchecks = {}
for run in runs:
    dict_dchecks[run] = {
        "run_num" : run,
    }

dict_dchecks = lstpipeline.add_dl1_paths_to_dict(dict_dchecks, root_dl1)


Adding dl1  data to dictionary (Run 2914)...

Adding dl1  data to dictionary (Run 2929)...

Adding dl1  data to dictionary (Run 2930)...

Adding dl1  data to dictionary (Run 2931)...

Adding dl1  data to dictionary (Run 2932)...

Adding dl1  data to dictionary (Run 2933)...

Adding dl1  data to dictionary (Run 2934)...

Adding dl1  data to dictionary (Run 2949)...

Adding dl1  data to dictionary (Run 2950)...

Adding dl1  data to dictionary (Run 2967)...

Adding dl1  data to dictionary (Run 2968)...

Adding dl1  data to dictionary (Run 2969)...

Adding dl1  data to dictionary (Run 2970)...

Adding dl1  data to dictionary (Run 2971)...

Adding dl1  data to dictionary (Run 2972)...

Adding dl1  data to dictionary (Run 2973)...

Adding dl1  data to dictionary (Run 2974)...

Adding dl1  data to dictionary (Run 2975)...

Adding dl1  data to dictionary (Run 2976)...

Adding dl1  data to dictionary (Run 2977)...

Adding dl1  data to dictionary (Run 2988)...

Adding dl1  data to dictionary (R

CPU times: user 24.2 s, sys: 3.95 s, total: 28.2 s
Wall time: 35.5 s


### For each runs having a set of subruns

In [5]:
dict_run_sruns = {}
for run in runs:
    fnames_dl1 = np.sort(dict_dchecks[run]["dl1a"]["srunwise"])
    srun_numbers = [int(f.split(".")[-2]) for f in fnames_dl1]
    dict_run_sruns[run] = srun_numbers

### Storing the jobs in sets of certain amount of subruns inside the same job

In [6]:
n_jobs = 0
with open(file_job_config, "w") as file:

    for run in runs:
    
        count_sruns = 0
        sruns = np.sort(dict_run_sruns[run])
    
        tmp_str = ""
        for srun in sruns:
            
            tmp_str = tmp_str + f"_{srun}"
    
            # Launching a certain amount of subruns together
            if (count_sruns % n_subruns_job == 0 and srun != 0) or (srun == max(sruns)):

                tmp_str_splitted = tmp_str.split("_")
                if len(tmp_str_splitted) != 2:
                    tmp_str = "_" + tmp_str_splitted[1] + "_" + tmp_str_splitted[-1]
                
                file.write(f"{run}{tmp_str}\n")
                tmp_str = ""
                n_jobs += 1
    
            count_sruns += 1
print(f"The final amount of jobs is {n_jobs}")

The final amount of jobs is 1543
