In [1]:
# submit new jobs to alphafold

BATCH_SIZE = 274


In [3]:

# from submitted jobs collect BATCH_SIZE new jobs that are not in complete and not in list of running jobs
import os
import json
import csv
import glob
import shutil
import datetime


In [4]:

# Define paths
SUBMITTED_FOLDER = 'submitted'
COMPLETED_FOLDER = 'completed'



# Load running jobs from all batch folders
running_jobs = set()
batch_folders = glob.glob('batch_*')
batch_folders = [folder for folder in batch_folders if os.path.isdir(folder)]
for batch_folder in batch_folders:
    for job_file in os.listdir(batch_folder):
        if job_file.endswith('.json'):
            running_jobs.add(job_file)

# Read all .json files in the folder 'completed' and add the name without .json to the list of completed jobs
completed_jobs = set()

if os.path.exists(COMPLETED_FOLDER):
    for filename in os.listdir(COMPLETED_FOLDER):
        if filename.endswith('.json'):
            completed_jobs.add(os.path.splitext(filename)[0])

# Collect new jobs
batch_list = []
for filename in os.listdir(SUBMITTED_FOLDER):
    if len(batch_list) >= BATCH_SIZE:
        break
    if filename.endswith('.json') and filename not in running_jobs and filename not in completed_jobs:
        batch_list.append(filename)

# Output batch list
print(f"Collected {len(batch_list)} new jobs for the batch.")

# Create a new batch folder with increasing number

# Find the current highest batch number
if batch_folders:
    # Extract numbers from existing batch folders
    batch_numbers = [int(folder.split('_')[1]) for folder in batch_folders if folder.split('_')[1].isdigit()]
    new_batch_number = max(batch_numbers) + 1 if batch_numbers else 1
else:
    new_batch_number = 1

# Create new batch folder
new_batch_folder = f"batch_{new_batch_number}"
os.makedirs(new_batch_folder, exist_ok=True)

# Copy jobs to the new batch folder
for job in batch_list:
    source_path = os.path.join(SUBMITTED_FOLDER, job)
    destination_path = os.path.join(new_batch_folder, job)
    shutil.copy2(source_path, destination_path)

print(f"Created new batch folder: {new_batch_folder}")
print(f"Copied {len(batch_list)} jobs to the batch folder.")


Collected 274 new jobs for the batch.
Created new batch folder: batch_1
Copied 274 jobs to the batch folder.


- via scp transfer batch to raven
- use submit_jobs.sh to submit jobs

In [6]:
BATCH_NUM = 1

# Append all jobs in the batch folder to running_jobs.csv with timestamp and batch number
batch_folder = f"batch_{BATCH_NUM}"
if os.path.exists(batch_folder):
    timestamp = datetime.datetime.now().isoformat()
    with open(RUNNING_JOBS_CSV, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for job_file in os.listdir(batch_folder):
            if job_file.endswith('.json'):
                writer.writerow([job_file, timestamp, BATCH_NUM])
    print(f"Appended jobs from {batch_folder} to {RUNNING_JOBS_CSV} with timestamp {timestamp}.")
else:
    print(f"Batch folder {batch_folder} does not exist.")


Appended jobs from batch_1 to running_jobs.csv with timestamp 2025-05-19T15:58:30.406869.
