diff --git a/_tsml_research_resources/soton/iridis/README.md b/_tsml_research_resources/soton/iridis/README.md new file mode 100644 index 00000000..8aa43f81 --- /dev/null +++ b/_tsml_research_resources/soton/iridis/README.md @@ -0,0 +1,7 @@ +serial_scripts: + Simple(er) scripts which submit each experiment as a separate job. This is fine + for small numbers of experiments, but limits you to 32 jobs per user on Iridis. +batch_scripts: + More complex scripts which submit multiple experiments in a single job. This + allows a much larger number of jobs for experiments, but requires more setup to + work efficiently. diff --git a/_tsml_research_resources/soton/iridis/batch_scripts/datasetsUV112Batch.zip b/_tsml_research_resources/soton/iridis/batch_scripts/datasetsUV112Batch.zip new file mode 100644 index 00000000..894caeb3 Binary files /dev/null and b/_tsml_research_resources/soton/iridis/batch_scripts/datasetsUV112Batch.zip differ diff --git a/_tsml_research_resources/soton/iridis/batch_scripts/taskfarm_classification_experiments.sh b/_tsml_research_resources/soton/iridis/batch_scripts/taskfarm_classification_experiments.sh new file mode 100644 index 00000000..afff01d4 --- /dev/null +++ b/_tsml_research_resources/soton/iridis/batch_scripts/taskfarm_classification_experiments.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# Check and edit all options before the first run! +# While reading is fine, please dont write anything to the default directories in this script + +# Start and end for resamples +max_folds=10 +start_fold=1 + +# To avoid hitting the cluster queue limit we have a higher level queue +max_num_submitted=900 + +# Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx +queue="batch" + +# The number of tasks/threads to use in each job. 40 is the number of cores on batch nodes +n_tasks_per_node=40 + +# Enter your username and email here +username="mbm1g23" +mail="NONE" +mailto=$username"@soton.ac.uk" + +# Max allowable is 60 hours +max_time="60:00:00" + +# Start point for the script i.e. 3 datasets, 3 classifiers = 9 experiments to submit, start_point=5 will skip to job 5 +start_point=1 + +# Put your home directory here +local_path="/mainfs/home/$username/" + +# Datasets to use and directory of data files. This can either be a text file or directory of text files +# Separate text files will not run jobs of the same dataset in the same node. This is good to keep large and small datasets separate +data_dir="$local_path/Data/" +datasets="$local_path/DataSetLists/ClassificationBatch/" + +# Results and output file write location. Change these to reflect your own file structure +results_dir="$local_path/ClassificationResults/results/" +out_dir="$local_path/ClassificationResults/output/" + +# The python script we are running +script_file_path="$local_path/tsml-eval/tsml_eval/experiments/classification_experiments.py" + +# Environment name, change accordingly, for set up, see https://github.com/time-series-machine-learning/tsml-eval/blob/main/_tsml_research_resources/soton/iridis/iridis_python.md +# Separate environments for GPU and CPU are recommended +env_name="eval-py11" + +# Classifiers to loop over. Must be seperated by a space. Different classifiers will not run in the same node +# See list of potential classifiers in set_classifier +classifiers_to_run="ROCKET DrCIF" + +# You can add extra arguments here. See tsml_eval/utils/arguments.py parse_args +# You will have to add any variable to the python call close to the bottom of the script +# and possibly to the options handling below + +# generate a results file for the train data as well as test, usually slower +generate_train_files="false" + +# If set for true, looks for _TRAIN.ts file. This is useful for running tsml-java resamples +predefined_folds="false" + +# Normalise data before fit/predict +normalise_data="false" + + +# ====================================================================================== +# Experiment configuration end +# ====================================================================================== + + +# Set to -tr to generate test files +generate_train_files=$([ "${generate_train_files,,}" == "true" ] && echo "-tr" || echo "") + +# Set to -pr to use predefined folds +predefined_folds=$([ "${predefined_folds,,}" == "true" ] && echo "-pr" || echo "") + +# Set to -rn to normalise data +normalise_data=$([ "${normalise_data,,}" == "true" ] && echo "-rn" || echo "") + +mkdir -p "${out_dir}/" + +# This creates the submission file to run and does clean up +submit_jobs () { + +echo "#!/bin/bash +#SBATCH --mail-type=${mail} +#SBATCH --mail-user=${mailto} +#SBATCH --job-name=batch-${dt} +#SBATCH -p ${queue} +#SBATCH -t ${max_time} +#SBATCH -o ${out_dir}/%A-${dt}.out +#SBATCH -e ${out_dir}/%A-${dt}.err +#SBATCH --nodes=1 +#SBATCH --ntasks=${cmdCount} + +. /etc/profile + +module load anaconda/py3.10 +source activate $env_name + +staskfarm ${out_dir}/generatedCommandList-${dt}.txt" > generatedSubmissionFile-${dt}.sub + +echo "At experiment ${expCount}, ${totalCount} jobs submitted total" + +sbatch < generatedSubmissionFile-${dt}.sub + +rm generatedSubmissionFile-${dt}.sub + +} + +totalCount=0 +expCount=0 + +# turn a directory of files into a list +if [[ -d $dataset_list ]]; then + file_names="" + for file in ${dataset_list}/*; do + file_names="$file_names$dataset_list$(basename "$file") " + done + dataset_list=$file_names +fi + +for dataset_file in $dataset_list; do + +echo "Dataset list ${dataset_file}" + +for classifier in $classifiers_to_run; do + +# we use time for unique names +sleep 1 +cmdCount=0 +dt=$(date +%Y%m%d%H%M%S) + +while read dataset; do + +# Skip to the script start point +((expCount++)) +if ((expCount>=start_point)); then + +# This finds the resamples to run and skips jobs which have test/train files already written to the results directory. +resamples_to_run="" +for (( i=start_fold-1; i=n_tasks_per_node)); then + submit_jobs + + # This is the loop to stop you from dumping everything in the queue at once, see max_num_submitted jobs + num_jobs=$(squeue -u ${username} --format="%20P %5t" -r | awk '{print $2, $1}' | grep -e "R ${queue}" -e "PD ${queue}" | wc -l) + while [ "${num_jobs}" -ge "${max_num_submitted}" ] + do + echo Waiting 60s, "${num_jobs}" currently submitted on ${queue}, user-defined max is ${max_num_submitted} + sleep 60 + num_jobs=$(squeue -u ${username} --format="%20P %5t" -r | awk '{print $2, $1}' | grep -e "R ${queue}" -e "PD ${queue}" | wc -l) + done + + sleep 1 + cmdCount=0 + dt=$(date +%Y%m%d%H%M%S) +fi + +# Input args to the default classification_experiments are in main method of +# https://github.com/time-series-machine-learning/tsml-eval/blob/main/tsml_eval/experiments/classification_experiments.py +echo "python -u ${script_file_path} ${data_dir} ${results_dir} ${classifier} ${dataset} ${resample} ${generate_train_files} ${predefined_folds} ${normalise_data}" >> ${out_dir}/generatedCommandList-${dt}.txt + +((cmdCount++)) +((totalCount++)) + +done +fi +done < ${dataset_file} + +if ((cmdCount>0)); then + # final submit for this dataset list + submit_jobs +fi + +done +done + +echo Finished submitting jobs diff --git a/_tsml_research_resources/soton/iridis/classification_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/classification_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/classification_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/classification_experiments.sh index 2d7c77b2..557fe6c1 100644 --- a/_tsml_research_resources/soton/iridis/classification_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/classification_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# classifiers_to_run (list of classifiers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=30 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=100 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/soton/iridis/clustering_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/clustering_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/clustering_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/clustering_experiments.sh index 387c7785..e2686061 100644 --- a/_tsml_research_resources/soton/iridis/clustering_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/clustering_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# clusterers_to_run (list of clusterers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=30 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=100 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/soton/iridis/gpu_classification_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_classification_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/gpu_classification_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/gpu_classification_experiments.sh index 963cb9f1..41b90b47 100644 --- a/_tsml_research_resources/soton/iridis/gpu_classification_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_classification_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# classifiers_to_run (list of classifiers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=5 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=12 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/soton/iridis/gpu_clustering_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_clustering_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/gpu_clustering_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/gpu_clustering_experiments.sh index 62a4c12e..72acb709 100644 --- a/_tsml_research_resources/soton/iridis/gpu_clustering_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_clustering_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# clusterers_to_run (list of clusterers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=5 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=12 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/soton/iridis/gpu_regression_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_regression_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/gpu_regression_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/gpu_regression_experiments.sh index 9ccf952e..c7d1f0b3 100644 --- a/_tsml_research_resources/soton/iridis/gpu_regression_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/gpu_regression_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# regressors_to_run (list of regressors to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=30 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=100 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/soton/iridis/regression_experiments.sh b/_tsml_research_resources/soton/iridis/serial_scripts/regression_experiments.sh similarity index 96% rename from _tsml_research_resources/soton/iridis/regression_experiments.sh rename to _tsml_research_resources/soton/iridis/serial_scripts/regression_experiments.sh index 24aed39c..ca7db812 100644 --- a/_tsml_research_resources/soton/iridis/regression_experiments.sh +++ b/_tsml_research_resources/soton/iridis/serial_scripts/regression_experiments.sh @@ -1,15 +1,12 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# regressors_to_run (list of regressors to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples max_folds=30 start_fold=1 -# To avoid dumping 1000s of jobs in the queue we have a higher level queue +# To avoid hitting the cluster queue limit we have a higher level queue max_num_submitted=100 # Queue options are https://sotonac.sharepoint.com/teams/HPCCommunityWiki/SitePages/Iridis%205%20Job-submission-and-Limits-Quotas.aspx diff --git a/_tsml_research_resources/uea/ada/classification_experiments.sh b/_tsml_research_resources/uea/ada/classification_experiments.sh index 619eb2fd..d0b2609c 100644 --- a/_tsml_research_resources/uea/ada/classification_experiments.sh +++ b/_tsml_research_resources/uea/ada/classification_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# classifiers_to_run (list of classifiers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples diff --git a/_tsml_research_resources/uea/ada/clustering_experiments.sh b/_tsml_research_resources/uea/ada/clustering_experiments.sh index e0aa28fd..f69a7417 100644 --- a/_tsml_research_resources/uea/ada/clustering_experiments.sh +++ b/_tsml_research_resources/uea/ada/clustering_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# clusterers_to_run (list of clusterers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples diff --git a/_tsml_research_resources/uea/ada/gpu_classification_experiments.sh b/_tsml_research_resources/uea/ada/gpu_classification_experiments.sh index bf70d898..907744a9 100644 --- a/_tsml_research_resources/uea/ada/gpu_classification_experiments.sh +++ b/_tsml_research_resources/uea/ada/gpu_classification_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# classifiers_to_run (list of classifiers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # To use GPU resources you need to be given access (gpu qos), which involves emailing hpc.admin@uea.ac.uk diff --git a/_tsml_research_resources/uea/ada/gpu_clustering_experiments.sh b/_tsml_research_resources/uea/ada/gpu_clustering_experiments.sh index 4a9f3559..e9b0eb08 100644 --- a/_tsml_research_resources/uea/ada/gpu_clustering_experiments.sh +++ b/_tsml_research_resources/uea/ada/gpu_clustering_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# clusterers_to_run (list of clusterers to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # To use GPU resources you need to be given access (gpu qos), which involves emailing hpc.admin@uea.ac.uk diff --git a/_tsml_research_resources/uea/ada/gpu_regression_experiments.sh b/_tsml_research_resources/uea/ada/gpu_regression_experiments.sh index 0dd2094f..2bb0d074 100644 --- a/_tsml_research_resources/uea/ada/gpu_regression_experiments.sh +++ b/_tsml_research_resources/uea/ada/gpu_regression_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# regressors_to_run (list of regressors to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # To use GPU resources you need to be given access (gpu qos), which involves emailing hpc.admin@uea.ac.uk diff --git a/_tsml_research_resources/uea/ada/regression_experiments.sh b/_tsml_research_resources/uea/ada/regression_experiments.sh index 7b476acd..3525e7e2 100644 --- a/_tsml_research_resources/uea/ada/regression_experiments.sh +++ b/_tsml_research_resources/uea/ada/regression_experiments.sh @@ -1,8 +1,5 @@ #!/bin/bash -# CHECK before each new run: -# datasets (list of problems) -# results_dir (where to check/write results) -# regressors_to_run (list of regressors to run) +# Check and edit all options before the first run! # While reading is fine, please dont write anything to the default directories in this script # Start and end for resamples