In [13]:

import pandas as pd
from collections import defaultdict

# Load the Excel file and the "Invalid Records" sheet
df = pd.read_excel("src/results/aggregated_results.xlsx", sheet_name="Invalid Records", header=None)

# Convert records to string and strip whitespace
records = df[0].astype(str).str.strip()

# Extract the last number from each record
# Assuming the record format is like: network_4_0_39_1.6_0.7_1_1078
last_numbers = df[0].astype(str).apply(lambda x: x.strip().split('_')[-1])

# Convert to sorted unique integers
numbers = sorted(set(map(int, last_numbers)))

# Join them into a comma-separated string
array_values = ",".join(map(str, numbers))

# Construct the sbatch command
sbatch_command = f"sbatch --array={array_values} slurm.sh"
print(len(numbers))
print(sbatch_command)

# Extract prefix and ID from each record
prefix_id_pairs = records.apply(lambda x: ('_'.join(x.split('_')[:-1]), int(x.split('_')[-1])))

# Group IDs by prefix
prefix_to_ids = defaultdict(set)
for prefix, job_id in prefix_id_pairs:
    prefix_to_ids[prefix].add(job_id)

# Optional: sort the IDs for each prefix
prefix_to_ids = {prefix: sorted(list(ids)) for prefix, ids in prefix_to_ids.items()}

# Print the mapping
print("Unique prefixes and associated job IDs:")
for prefix, ids in prefix_to_ids.items():
    print(f"{prefix}: {ids}")

148
sbatch --array=39,41,67,68,69,74,76,77,94,95,124,142,200,229,230,231,235,238,239,256,257,258,286,288,295,296,305,391,392,393,397,398,400,401,418,419,466,524,525,553,554,555,560,562,563,580,581,582,607,611,685,715,716,717,721,722,742,743,773,790,854,877,878,879,883,884,886,887,904,905,906,910,953,1010,1039,1040,1041,1045,1046,1048,1049,1066,1067,1068,1201,1202,1203,1207,1210,1211,1228,1229,1230,1240,1258,1267,1277,1363,1364,1365,1369,1370,1372,1373,1390,1391,1402,1417,1420,1429,1438,1439,1498,1501,1503,1525,1526,1527,1531,1532,1534,1535,1552,1553,1554,1687,1688,1689,1693,1694,1696,1697,1714,1715,1716,1726,1745,1753,1762,1819,1820,1821,1849,1850,1851,1858,1877,1908 slurm.sh
Unique prefixes and associated job IDs:
network_4_0_33_1.4_0.5_10: [39, 525, 1821]
network_4_0_33_1.4_0.6_5: [41]
network_4_0_36_1.4_0.6_1: [67, 229, 391, 553, 715, 877, 1039, 1201, 1363, 1525, 1687, 1849]
network_4_0_36_1.4_0.6_5: [68, 230, 392, 554, 716, 878, 1040, 1202, 1364, 1526, 1688, 1850]
network_4_0_36_1.