#### Brain2cog all of Scott 10K

### Prelims: create error log files

In [None]:
%%bash

touch /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c

### Remove patients without mwc1t1 files, and make sure that only mwc1t1 files exist within patient directories before transferring from ephemeral (cleaning step)

In [1]:
import os
from tqdm import tqdm
import shutil

dir = '/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/'

deleted_subjects = 0
deleted_items = 0
subdirs = os.listdir(dir)

for subdir in tqdm(subdirs, desc="Checking subdirectories", unit="dir"):
    path = os.path.join(dir, subdir)

    if not os.path.isdir(path):
        continue

    try:
        files = os.listdir(path)

        if 'mwc1t1_reoriented_fsl.nii.gz' not in files:
            shutil.rmtree(path)
            deleted_subjects += 1
            continue

        for f in files:
            if f != 'mwc1t1_reoriented_fsl.nii.gz':
                fpath = os.path.join(path, f)
                if os.path.isfile(fpath):
                    os.remove(fpath)
                    deleted_items += 1
                elif os.path.isdir(fpath):
                    shutil.rmtree(fpath)
                    deleted_items += 1

    except FileNotFoundError:
        print(f"Path not found: {path}")
        continue

print(f"Removed {deleted_subjects} subject directories")
print(f"Scrubbed {deleted_items} non-mwc1t1 files/folders")


Checking subdirectories: 100%|██████████| 8170/8170 [03:19<00:00, 40.95dir/s]

Removed 9 subject directories
Scrubbed 17042 non-mwc1t1 files/folders





### (Optional) Stripping b2c_ suffix from original b2c files and then rsyncing them

In [2]:
import os

dir = '/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/'

for root, dirs, files in os.walk(dir, topdown=True):
    for d in dirs:
        if 'b2c_' in d:
            old_path = os.path.join(root, d)
            new_path = os.path.join(root, d.replace('b2c_', ''))
            os.rename(old_path, new_path)

    for f in files:
        if 'b2c_' in f:
            old_path = os.path.join(root, f)
            new_path = os.path.join(root, f.replace('b2c_', ''))
            os.rename(old_path, new_path)


### 1. Create script of all T1 patients from ADNI

In [2]:
%%bash 

mydir=/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/
output_file=/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/scott_10k_t1_paths.txt
>"$output_file"
find "$mydir" -type f -name "*.nii.gz" >> "$output_file"

wc -l<"${output_file}"

15733


### 1.25 Create a list of mwc1t1 files in ephemeral: that way I can cross do t1 paths - successful mwc1t1 paths to get missing mwc1t1s

In [1]:
%%bash

mydir=/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/
output_file=/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/successful_scott_10k_mwc1t1_paths.txt

### collect mwc1t1 file paths##

>"$output_file"
find "$mydir" -type f -name "mwc1t1*" >> "$output_file"
wc -l<"${output_file}"

9837


### 1.5 Cross reference completed b2c jobs with mwc1t1 files with T1 paths, to identify missing patient scans

In [4]:
from itertools import islice

output_file='/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/scott_10k_t1_paths.txt'

t1_codes = {}
with open(output_file) as t1_file:
    file = t1_file.readlines()
    for path in file:
        patient_name_parts = path.split('/')
        patient_name = patient_name_parts[-1]
        patient_name = patient_name.replace('.nii.gz','')
        patient_name = patient_name.replace('.nii.gz\n','')
        patient_name = patient_name.strip()
        t1_codes.update({patient_name:path})

d2 = dict(islice(t1_codes.items(), 100))
print(d2)

{'012_S_4849_ADNI-T1_2012-11-05_12_04_44.0': '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1/012_S_4849/2012-11-05_12_04_44.0/012_S_4849_ADNI-T1_2012-11-05_12_04_44.0.nii.gz\n', '012_S_4849_ADNI-T1_2012-07-24_09_50_58.0': '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1/012_S_4849/2012-07-24_09_50_58.0/012_S_4849_ADNI-T1_2012-07-24_09_50_58.0.nii.gz\n', '012_S_4849_ADNI-T1_2013-08-26_14_02_58.0': '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1/012_S_4849/2013-08-26_14_02_58.0/012_S_4849_ADNI-T1_2013-08-26_14_02_58.0.nii.gz\n', '012_S_4849_ADNI-T1_2013-02-25_13_24_41.0': '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1/012_S_4849/2013-02-25_13_24_41.0/012_S_4849_ADNI-T1_2013-02-25_13_24_41.0.nii.gz\n', '941_S_6471_ADNI-T1_2020-09-01_12_46_08.0': '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1/941_S_6471/2020-09-01_12_46_08.0/941_S_6471_ADNI-T1_2020-09-01_12_46_08.0.nii.gz\n', '941

In [5]:
output_file='/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/successful_scott_10k_mwc1t1_paths.txt'

successful_mwc1t1s = []
with open(output_file) as mwc1t1_file:
    file = mwc1t1_file.readlines()
    for path in file:
        patient_name_parts = path.split('/')
        patient_name = patient_name_parts[-2]
        patient_name = patient_name.strip()
        successful_mwc1t1s.append(patient_name)
print(len(successful_mwc1t1s))

8161


In [6]:
import os

missing_mwc1t1_list = '/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/missing_mwc1t1_list.txt'
missing_mwc1t1_root = '/rds/general/project/scott_data_adni/live/ADNI/ADNI_NIFTI/ADNI_allT1'
counter = 0

verbosity = False

!>"$missing_mwc1t1_list"
path_list = []
for t1 in t1_codes.keys():
    if t1 not in successful_mwc1t1s:
        path = t1_codes[t1]
        path_list.append(path)
        with open(missing_mwc1t1_list,'a') as outfile:
            if verbosity:
                print(path.replace('\n',''))
                print(os.path.exists(path.replace('\n','')))
            outfile.write(path)

!wc -l< '/rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/missing_mwc1t1_list.txt'

7572


### 2. Create a file that submits brain2cog jobs for files without mwc1t1 images

In [4]:
%%file /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/b2c_scott_10k.sh

#!/bin/bash
#PBS -l walltime=01:45:00
#PBS -l select=1:ncpus=1:mem=20gb
#PBS -N scott_10k_b2c
#PBS -J 0-9999
#PBS -o /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/
#PBS -e /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/

inpath=`head -n ${PBS_ARRAY_INDEX} /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/scott_10k_t1_paths.txt | tail -n 1`
infile=`basename $inpath .nii.gz`;

echo "inpath: $inpath";
echo "infile: $infile";

outdir=/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25
mkdir -p ${outdir}
cd ${outdir}

echo "outdir: ${outdir}"

cmd="/rds/general/project/scott_code/live/brain2cog/brain2cog.sh -i \"$inpath\" -o \"$outdir/$infile\""
echo "$cmd"
eval "$cmd"

# cd "$outdir/$infile" || exit 1

# find "$outdir/$infile/" -type f -not -name 'mwc1t1*.nii*' \
#     ! -path "*/task/*" \
#     ! -path "*/net/*" \
#     ! -path "*/fd/*" \
#     ! -path "*/fdinfo/*" \
#     ! -path "*/map_files/*" \
#     ! -path "*/ns/*" \
#     ! -path "*/sys/*" \
#     ! -path "*/proc/*" \
#     -delete

cd ${outdir}
find . -type f -print0 | while IFS= read -r -d '' path;
  do touch "$path" ;
done

rm ~/java*
rm ~/*crash_dump*

Overwriting /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/b2c_scott_10k.sh


In [5]:
%%bash

chmod -Rf 775 /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/b2c_scott_10k.sh
#export PBS_ARRAY_INDEX=1; /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/b2c_scott_10k.sh
qsub /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/b2c_scott_10k.sh

1699091[].pbs-7


### Refresh timestamps script

In [9]:
%%file /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/refresh_b2c_timestamps.sh

#!/bin/bash
#PBS -l walltime=01:00:00
#PBS -l select=1:ncpus=1:mem=1gb
#PBS -N refresh_b2c
#PBS -o /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/
#PBS -e /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/

outdir=/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/

cd ${outdir}
find . -type f -print0 | while IFS= read -r -d '' path;
  do touch "$path" ;
done

#rsync -av /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/ /rds/general/project/c3nl_scott_students/live/data/sankeith/scott_10k_b2c/

#rm /rds/general/user/sk4724/home/java*

Overwriting /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/refresh_b2c_timestamps.sh


In [10]:
%%bash

chmod -Rf 775 /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/refresh_b2c_timestamps.sh
qsub /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/refresh_b2c_timestamps.sh

1563157.pbs-7


### Z-score mwc1t1 images with themselves so they are normalised and 1mm^3 resolution

In [2]:
%%file /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/flirt_1mm_scott_10k.sh

#PBS -lwalltime=00:05:00
#PBS -lselect=1:ncpus=1:mem=32gb
#PBS -o /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/
#PBS -e /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_logs/b2c/
#PBS -J 1-9999
#PBS -N FLIRT_1mm_Scott_10k

# **NB** setting all the above correctly is important. the walltime has been reduced because from experience this runs super quick
# -------------------------------------------------------------------------------

inpath=`head -n ${PBS_ARRAY_INDEX} /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/successful_scott_10k_mwc1t1_paths.txt | tail -n 1`
folderbase=$(basename "$(dirname "$inpath")" .nii.gz)
filebase=$(basename "$inpath" .nii.gz)
infile="${folderbase}_${filebase}"
echo "inpath: $inpath";
echo "infile: $infile";

ref=/rds/general/project/c3nl_scott_students/live/sankeith/standards/MNI152_T1_1mm_brain.nii.gz
outdir=/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25
outfile=/rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/1mm_${infile}.nii.gz
mkdir -p ${outdir}

echo "outdir: ${outdir}"

module load fsl > /dev/null 2>&1
module load FSL/6.0.5.1-foss-2021a > /dev/null 2>&1
flirt -in "$inpath" -ref "$ref" -applyisoxfm 1.0 -nosearch -out "$outfile"
echo "flirted!"
flirt -in "$outfile" -ref "$ref" -out ${outdir}/F_1mm_${infile}.nii.gz



Overwriting /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/flirt_1mm_scott_10k.sh


In [4]:
%%bash

chmod -Rf 775 /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/flirt_1mm_scott_10k.sh
export PBS_ARRAY_INDEX=1; /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/flirt_1mm_scott_10k.sh
qsub /rds/general/project/c3nl_scott_students/live/sankeith/scott_10k_housekeeping/flirt_1mm_scott_10k.sh

inpath: /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25/094_S_0531_ADNI-T1_2007-06-25_09_39_07.0/mwc1t1_reoriented_fsl.nii.gz
07.0_mwc1t1_reoriented_fsl_2007-06-25_09_39_
outdir: /rds/general/project/c3nl_scott_students/ephemeral/sankeith/scott_10k_20aug25
flirted!

Final result: 
0.987304 -0.001423 0.001622 0.470130 
0.003100 0.990022 0.000370 0.879027 
41 1.303316 009349 0.9768
0.000000 0.000000 0.000000 1.000000 

1701033[].pbs-7
