In [1]:
%matplotlib notebook
%load_ext autoreload
%pwd

'/ocean/projects/asc170022p/mtragoza/lung-project/notebooks'

In [2]:
import sys
import pandas as pd
sys.path.append('../../param_search')
import param_search as ps

## Setup experiment

[[Setup](#Setup-experiment)] [[Submit](#Submit-jobs)] [[Monitor](#Monitor-jobs)] [[Analyze](#Analyze-results)]

In [8]:
# define a job template and name format
template = '''\
#!/bin/bash
#SBATCH --job-name={job_name}
#SBATCH --account=bio170034p
#SBATCH --partition=BatComputer
#SBATCH --gres=gpu:rtx6000:1
#SBATCH --mem=64000M
#SBATCH -x v034
#SBATCH --time=48:00:00
#SBATCH -o %J.stdout
#SBATCH -e %J.stderr
#SBATCH --mail-type=all

hostname
pwd
module load anaconda3
conda activate /ocean/projects/asc170022p/mtragoza/mambaforge/envs/lung-project
nvidia-smi

python ../../../train.py \\
    --data_name emory \\
    --data_root /ocean/projects/asc170022p/shared/Data/4DLungCT/Emory/ \\
    --mesh_version {mesh_version} \\
    --test_case {test_case} \\
    --test_phase {test_phase} \\
    --rho_value {rho_value} \\
    --conv_channels {conv_channels} \\
    --output_func {output_func} \\
    --interp_size {interp_size} \\
    --save_prefix {job_name} 

echo Done
'''
name_format = 'train__emory__{test_case}__{test_phase}__{rho_value}__{output_func}'

In [9]:
param_space = ps.ParamSpace(
    mesh_version=11,
    test_case=[None], #['Case1Pack', 'Case2Pack', 'Case3Pack', 'Case4Pack', 'Case5Pack', 'Case6Pack', 'Case7Pack', 'Case8Deploy', 'Case9Pack'],
    test_phase=[0],# 10, 20, 30, 40, 50, 60, 70, 80, 90],
    rho_value=[0, 1000, 'anat'],
    conv_channels=[32],
    interp_size=[5],
    output_func=['relu', 'softplus', 'exp'],
)

for p in param_space:
    print(name_format.format(**p))

print(len(param_space))

train__emory__None__0__0__relu
train__emory__None__0__0__softplus
train__emory__None__0__0__exp
train__emory__None__0__1000__relu
train__emory__None__0__1000__softplus
train__emory__None__0__1000__exp
train__emory__None__0__anat__relu
train__emory__None__0__anat__softplus
train__emory__None__0__anat__exp
9


## Submit jobs

[[Setup](#Setup-experiment)] [[Submit](#Submit-jobs)] [[Monitor](#Monitor-jobs)] [[Analyze](#Analyze-results)]

In [10]:
expt_name = [
    '2024-11-22__phantom__250',
    '2024-11-22__phantom__250__resub',
    '2024-11-30__emory__phase',
    '2024-12-02__emory__interp_size',
    '2024-12-03__emory__gpu_shared',
    '2024-12-07__emory__interface'
][-1]
expt_name

'2024-12-07__emory__interface'

In [32]:
do_submit = True
if do_submit:
    jobs = ps.submit(template, name_format, param_space, work_dir=expt_name)
    jobs.to_csv(f'{expt_name}.jobs')

100%|██████████| 9/9 [00:00<00:00, 105.67it/s]


  .replace('', float('nan')).map(pd.to_numeric)


## Monitor jobs

[[Setup](#Setup-experiment)] [[Submit](#Submit-jobs)] [[Monitor](#Monitor-jobs)] [[Analyze](#Analyze-results)]

In [34]:
jobs = pd.read_csv(f'{expt_name}.jobs', index_col=0)
status = ps.status(jobs, parse_stderr=True)
status

  .replace('', float('nan')).map(pd.to_numeric)
 'PENDING']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  status.update(new_status)
 '(Priority)' '(Priority)' '(Priority)']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  status.update(new_status)
  status.update(new_status)


Unnamed: 0_level_0,index,test_case,test_phase,rho_value,conv_channels,interp_size,output_func,job_name,partition,job_state,node_id,runtime,work_dir,array_idx,stdout,stderr
job_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
27477882,0,,0,0,32,5,relu,train__emory__None__0__0__relu,BatComputer,,,,/ocean/projects/asc170022p/mtragoza/lung-proje...,,dv004.ib.bridges2.psc.edu\n/ocean/projects/asc...,AssertionError: u_pred_dofs contains nan (case...
27477883,1,,0,0,32,5,softplus,train__emory__None__0__0__softplus,BatComputer,RUNNING,dv004,7:31,/ocean/projects/asc170022p/mtragoza/lung-proje...,,dv004.ib.bridges2.psc.edu\n/ocean/projects/asc...,
27477884,2,,0,0,32,5,exp,train__emory__None__0__0__exp,BatComputer,PENDING,(Resources),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477885,3,,0,1000,32,5,relu,train__emory__None__0__1000__relu,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477886,4,,0,1000,32,5,softplus,train__emory__None__0__1000__softplus,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477887,5,,0,1000,32,5,exp,train__emory__None__0__1000__exp,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477888,6,,0,anat,32,5,relu,train__emory__None__0__anat__relu,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477889,7,,0,anat,32,5,softplus,train__emory__None__0__anat__softplus,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,
27477890,8,,0,anat,32,5,exp,train__emory__None__0__anat__exp,BatComputer,PENDING,(Priority),0:00,/ocean/projects/asc170022p/mtragoza/lung-proje...,,,


In [35]:
status['job_state'] = status['job_state'].fillna('DONE')
status['stderr'] = status['stderr'].fillna('N/A')
status.groupby(['job_state', 'stderr'])[['job_name']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,job_name
job_state,stderr,Unnamed: 2_level_1
DONE,AssertionError: u_pred_dofs contains nan (case7_T00.nii),1
PENDING,,7
RUNNING,,1


In [37]:
status.iloc[0]

index                                                            0
test_case                                                      NaN
test_phase                                                       0
rho_value                                                        0
conv_channels                                                   32
interp_size                                                      5
output_func                                                   relu
job_name                            train__emory__None__0__0__relu
partition                                              BatComputer
job_state                                                     DONE
node_id                                                        NaN
runtime                                                        NaN
work_dir         /ocean/projects/asc170022p/mtragoza/lung-proje...
array_idx                                                      NaN
stdout           dv004.ib.bridges2.psc.edu\n/ocean/projects/as

## Analyze results

[[Setup](#Setup-experiment)] [[Submit](#Submit-jobs)] [[Monitor](#Monitor-jobs)] [[Analyze](#Analyze-results)]

In [None]:
m = ps.metrics(jobs, sep=',')
m

In [None]:
m.groupby(['job_name'])[['epoch']].max()

In [None]:
unfinished_jobs = d[d.epoch < 100]
unfinished_jobs

In [None]:
m.groupby(['job_name'])[['batch']].max()

In [None]:
m.columns

In [None]:
%autoreload
fig = ps.plot(
    m[(m.phase == 'train') & (m.epoch > 190) & (m.conv_channels > 8)],
    x=['dummy_targets', 'conv_channels', 'output_func'],
    y=['u_error', 'e_error', 'e_anat_corr', 'CTE'],
    hue=None,
    legend=True,
    legend_kws=dict(bbox_to_anchor=(0, -0.2)),
    tight=True,
    height=2.25, width=2.75
)

In [None]:
%autoreload
fig = ps.plot(
    m[(m.phase == 'train') & (m.epoch > 190) & (m.conv_channels > 8)],
    x=['dummy_targets', 'conv_channels', 'output_func'],
    y=['u_error', 'e_error', 'e_anat_corr', 'CTE'],
    hue=None,
    legend=True,
    legend_kws=dict(bbox_to_anchor=(0, -0.2)),
    tight=True,
    height=2.25, width=2.75
)

In [None]:
%autoreload
fig = ps.plot(
    m[(m.phase == 'test') & (m.epoch > 190) & (m.conv_channels > 8)],
    x=['dummy_targets', 'conv_channels', 'output_func'],
    y=['u_error', 'e_error', 'e_anat_corr', 'CTE'],
    hue=None,
    legend=True,
    legend_kws=dict(bbox_to_anchor=(0, -0.2)),
    tight=True,
    height=2.25, width=2.75
)

In [None]:
m[(m.phase == 'test') & (m.epoch > 190) & (m.conv_channels > 8)].groupby(['dummy_targets', 'conv_channels', 'output_func', 'phase'])[['u_error', 'e_error', 'e_anat_corr', 'CTE']].describe()