# Setup 

In [None]:
pwd

In [None]:
from pathlib import Path
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth',500)

In [None]:
pd.__version__

In [None]:
project_dir = Path('/data/Dnude/')
%pwd
%cd {project_dir}
%pwd

In [None]:
analysis_version = "2018_05_09"
scripts_dir  = Path('bids_work/mindcontrol_docs/defacing_files/')
sing_image = scripts_dir.joinpath('pydeface_v2-2018-01-29-e2252feba083.img')
swarm_dict_pkld = scripts_dir.joinpath('swarm_dict.pklz')

if 'swarm_dict' in locals():
    swarm_dict.to_pickle(swarm_dict_pkld)
elif swarm_dict_pkld.exists():
    swarm_dict = pd.read_pickle(swarm_dict_pkld)
else:
    swarm_dict = pd.Series({})



## Pydeface docker image

Using a docker image for pydeface for reproducibility. Building image with neurodocker.

The abover makes the pydeface docker container that is used below

## Setup to deface all brains (singularity image actually used though)

In [None]:
original_scans = scripts_dir.joinpath('faced_scans')
if not original_scans.exists():
    original_scans.mkdir()
    

Using ipparallel to speed things up a little:

In [None]:
from bids.grabbids import BIDSLayout
project_root = '/data/Dnude/bids_work/mindcontrol_docs/linked_bids/'
layout = BIDSLayout(project_root)
layout_der = BIDSLayout(Path(project_root,'derivatives'))
df_bids = layout.as_data_frame()
df_scans = df_bids.loc[df_bids.path.str.contains('nii.gz'),:]
df_scans = df_scans.query('type !="defaced"').query('modality == "anat"')
df_scans['orig_path'] = df_bids.path.apply(lambda x: Path(x).resolve().as_posix().replace('/gpfs/gsfs4/users','/data'))

In [None]:

df_scans.head()

## Defacing step: 

In [None]:
print(len(df_scans))
df_scans.head()

In [None]:
## check the project directory below
def make_deface_cmd(nifti_path,datadir,container_image='pydeface_v2',dev=False,singularity=True,other_images=[],template=None):
    from pathlib import Path
    datadir = Path(datadir)
    try:
        nifti_path = Path(nifti_path).relative_to(datadir)
    except ValueError:
        nifti_path = Path(nifti_path)
    container_image = Path(container_image).as_posix()
    cmd = "docker run -v " +  datadir.absolute().as_posix() + ':/mnt'
    if dev:
        cmd += ' -v /data/rodgersleejg/pydeface/pydeface/:/opt/conda/envs/neuro/lib/python3.6/site-packages/pydeface/'
        
    cmd += ' --rm ' + container_image + \
    " bash -c 'source activate neuro; /neurodocker/startup.sh pydeface"
    
    if other_images:
        other_images = [Path(p).as_posix() for p in other_images]
        cmd += " --deface_others_with_base " + ' '.join(other_images)
        
    if template:
        cmd += '--template ' + template 

    cmd += " --force --verbose"  + \
    " " + Path('/mnt',nifti_path).as_posix() + "'"
    
    
    if singularity:
        cmd = cmd.replace('docker run','module load singularity;singularity exec -H /home/rodgersleejg/temp_for_singularity').replace(' --rm','').replace(' -v ', ' -B ',)
    return cmd

def run_cmd(cmd):
    print(cmd)
    output = !{cmd}
    return output

In [None]:
df_scans['cmd'] = df_scans.apply(lambda row:make_deface_cmd(row.orig_path,project_dir,container_image=sing_image,dev=False,singularity=True),axis = 1)
df_scans.head().cmd

In [None]:
swarm_dict['deface'] = scripts_dir.joinpath('deface%s.cmd'%analysis_version)
swarm_dict['deface']

In [None]:
swarm_dict['deface'].write_text('\n'.join(df_scans.cmd[1:]))

In [None]:
swarm_dict['deface'].read_text().splitlines()[:5]

In [None]:
!swarm -f {swarm_dict['deface']}  -g 4 --logdir swarm_log --partition quick,nimh -p 2 -b 4 --time 00:10:00

## Tidy up...

In [None]:
original_scans

In [None]:
def get_backup_dest(backup_dir,scan_path):
    return backup_dir.joinpath(Path(scan_path).parent.name + '_T1.nii.gz')
    
df_paths = (df_scans.assign(
    original_path = lambda df: df.orig_path).
            assign(
                backup_path = lambda df:
                df.original_path.apply(lambda x:
                           get_backup_dest(backup_dir = original_scans,scan_path = x))
))

assert(0== df_paths.duplicated('backup_path').sum())

In [None]:
df_paths.head()

### Backup original scans

In [None]:
def move_scans(df_row,source=None,destination=None,overwrite=False):
    """
    Move scans.
    Input is a pandas dataframe row containing paths.
    Source and destination column are specified
    """
    source_file = Path(df_row[source])
    target_file = Path(df_row[destination])
#     print("source",source_file)
#     print("target",target_file)
    if not source_file.exists():
        print(f'{source_file} does not exist')
    elif target_file.exists() and not overwrite:
        print(f'{target_file} exists. Must set overwrite argument to True')
    else:
        pass
        stdout = %mv {df_row[source]} {df_row[destination]} 


In [None]:
df_paths.loc[:2,:].apply(move_scans, source='original_path', destination='backup_path', axis=1);
# df_paths.apply(move_scans, source='backup_path', destination='original_path', axis=1);

In [None]:
df_paths.loc[:2,:].backup_path.apply(lambda x: Path(x).exists())

In [None]:
df_paths.loc[:2,:].original_path.apply(lambda x: Path(x).exists())

In [None]:
df_paths.loc[3:,:].apply(move_scans, source='original_path', destination='backup_path', axis=1);
# df_paths.apply(move_scans, source='backup_path', destination='original_path', axis=1);
# df_paths.query('path.str.contains("derivatives")').apply(move_scans, source='backup_path', destination='original_path', axis=1);

In [None]:
# check scans are present:
# for f in df_paths.backup_path:
#     !ls {f}

### Rename defaced scans for BIDS compliance

In [None]:
df_paths['defaced_path'] = (df_paths.
                             original_path.
                             apply(
                                 lambda x:
                                 Path(Path(x).as_posix().split('.')[0] + '_defaced.nii.gz'))
)



In [None]:
# df_paths.loc[df_paths.backup_path.apply(lambda x: not Path(x).exists()),:]
df_paths.loc[df_paths.defaced_path.apply(lambda x: not Path(x).exists()),:]

In [None]:
df_paths
df_paths.apply(move_scans, source='defaced_path', destination='original_path', axis=1);

In [None]:
# df_paths
# df_paths.loc[1:2,:].apply(move_scans, source='defaced_path', destination='original_path', axis=1);

In [None]:
df_paths

In [None]:
# check scans are present:
# for f in df_paths.original_path:
#     !ls {f}