# Naming convention and contents of the created files

1. `faket/class_mask.mrc` = `class_mask.mrc` sliced to valid region.
1. `faket/occupancy_mask.mrc` = `occupancy_mask.mrc` slice to valid region.
1. `reconstruction.mrc` = `faket/reconstruction_shrec.mrc` slice to valid region for tomogram 9.
1. `faket/projections_noiseless.mrc` = `grandmodel_unbinned.mrc` measured with Radon transform.
1. `faket/projections_content.mrc` = `faket/projections_noiseless.mrc` + noise (std=0.1) shifted & scaled according its style*.
1. `faket/projections_noisy.mrc` = `faket/projections_noiseless.mrc` + noise (std=0.4) shifted & scaled according its style*.
1. `faket/projections_styled.mrc` = result of NST initialized with `faket/projections_noisy.mrc`, content `faket/projections_content.mrc`, and using its style*.
1. `faket/reconstruction_content.mrc` = reconstruction of `faket/projections_content.mrc`.
1. `faket/reconstruction_noisy.mrc` = reconstruction of `faket/projections_noisy.mrc`.
1. `faket/reconstruction_styled.mrc` = reconstruction of `faket/projections_styled.mrc`.
1. `faket/reconstruction_baseline.mrc` = reconstruction of `projections.mrc`.

\* Each time we mention style in the text above, it refers to a `projections.mrc` file from a model_N+1. In case N=8, the style is taken from N=0.

In [None]:
%load_ext autoreload
%autoreload 2

from os.path import join as pj
import multiprocessing
import gpuMultiprocessing
from faket.data import load_mrc, save_mrc, save_conf
from faket.data import slice_to_valid, vol_to_valid
from faket.data import downsample_sinogram_space
from faket.data import get_clim, get_theta
from faket.data import match_mean_std, normalize
from faket.transform import radon_3d, reconstruct_mrc
import matplotlib.pyplot as plt
# from tqdm.notebook import tqdm
import numpy as np

In [None]:
data_folder = 'data/shrec2021_extended_dataset/'

In [None]:
# SHREC21 provides the data in square shape even
# thought the data is stored only in the center
# The following values specify where to slice
z_valid = (0.32226, 0.67382)  # Valid range normalized

In [None]:
# slice class_mask.mrc to faket/class_mask.mrc
for N in range(10):
    vol_to_valid(data_folder, f'model_{N}', 'class_mask', z_valid, 
                 out_fname='faket/class_mask.mrc')

In [None]:
# slice occupancy_mask.mrc to faket/occupancy_mask.mrc
for N in range(10):
    vol_to_valid(data_folder, f'model_{N}', 'occupancy_mask', z_valid, 
                 out_fname='faket/occupancy_mask.mrc')

In [None]:
# slice reconstruction.mrc to faket/reconstruction_shrec.mrc
vol_to_valid(data_folder, f'model_9', 'reconstruction', z_valid, 
                 out_fname='faket/reconstruction_shrec.mrc')

## Creating projections

In [None]:
# create faket/projections_noiseless.mrc by measuring the grandmodel_unbinned.mrc with Radon transform

for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = {
        'input_mrc': pj(data_folder, f'model_{N}', 'grandmodel_unbinned.mrc'),
        'output_mrc': pj(data_folder, f'model_{N}', 'faket/projections_noiseless.mrc'),
        'radon_kwargs': {
            'theta': get_theta(data_folder, N),
            'dose': 0,
            'out_shape': 1024,
            'slice_axis': 1,
            # circle=False because we measure with the data outside the circle 
            # but later we cut the measurements to desired shape 
            # SHREC did it this way - confirmed from a personal communication
            'circle': False
        }
    }
    volume = load_mrc(conf['input_mrc'])
    sinogram = radon_3d(volume, **conf['radon_kwargs'])
    save_conf(conf['output_mrc'], conf)
    save_mrc(sinogram.astype(np.float32), conf['output_mrc'], overwrite=True)
print('Done')

In [None]:
# create faket/projections_content.mrc and faket/projections_noisy.mrc
for N in range(0, 1): # We do not need this modality for the test model_9
    print(f'Processing N: {N}')
    style_N = (N + 1) % 9 # For the last train model we take style stats from the first train model
    
    conf = {  # For noisy
        'input_mrc': pj(data_folder, f'model_{N}', 'faket/projections_noiseless.mrc'),
        'style_mrc': pj(data_folder, f'model_{style_N}', 'projections.mrc'),
        'output_mrc': pj(data_folder, f'model_{N}', 'faket/projections_noisy.mrc'),
        'mean': 0.0,
        'std': 0.4,
        'clip_outliers': (0.0001, 0.9999),
        'seed': N,
    }
    
    conf2 = {  # Changes for content
        'output_mrc': pj(data_folder, f'model_{N}', 'faket/projections_content.mrc'),
        'std': 0.1,
    }
    
    volume = load_mrc(conf['input_mrc'])
    style = load_mrc(conf['style_mrc'])
    
    rng = np.random.default_rng(seed=conf['seed'])
    noise = rng.normal(loc=conf['mean'], scale=conf['std'], 
                       size=volume.size).reshape(volume.shape)
    
    volume  = match_mean_std(volume, style)  # Scaling per tilt (bigger the abs(angle), longer the trajectory)
    volume = normalize(volume)  # Scale between [0, 1]
    
    volume_noisy = volume + noise
    volume_noisy = np.clip(volume_noisy, *get_clim(volume_noisy, *conf['clip_outliers']))  # Remove outliers
    volume_noisy = match_mean_std(volume_noisy, style)  # Scale back to match style
    
    save_conf(conf['output_mrc'], conf)
    save_mrc(volume_noisy.astype(np.float32), conf['output_mrc'], overwrite=True)
    
    ratio = conf['std'] / conf2['std']
    conf.update(conf2)
    volume_content = volume + noise / ratio  # Same noise just a fraction of the std
    volume_content = np.clip(volume_content, *get_clim(volume_content, *conf['clip_outliers']))  # Remove outliers
    volume_content = match_mean_std(volume_content, style)  # Scale back to match style
    
    save_conf(conf['output_mrc'], conf)
    save_mrc(volume_content.astype(np.float32), conf['output_mrc'], overwrite=True)
print('Done')

### Neural Style Transfer

In [None]:
nstc = {  # NEURAL STYLE TRANSFER BASE CONFIG
    # 'content': 'example.mrc',
    # 'style': 'example.mrc',
    # '--init': 'example.mrc',
    # '--output': 'example.mrc', 
    # '--random-seed': None,
    '--style-weights': 1.0,
    '--content-weight': 1.0, 
    '--tv-weight': 0,
    '--min-scale': 1024,
    '--end-scale': 1024,
    '--iterations': 1,
    '--initial-iterations': 1,
    '--save-every': 2,
    '--step-size': 0.15,
    '--avg-decay': 0.99,
    '--style-scale-fac': 1.0,
    '--pooling': 'max',
    '--devices': 'cuda:0', #
    '--seq_start' : 0,
    '--seq_end' : 61,
}

def get_command(expname, nst_command, config):
    command = (
    f"EXPNAME={expname} {nst_command} "
    f"{config['content']} {config['style']} "
    f"{' '.join([f'{k} {v}' for k, v in config.items() if k.startswith('--')])}")
    return command

In [None]:
# create faket/projections_styled.mrc
gpu_id_list = [0]
NST_command = 'python3 -m faket.style_transfer.cli'

command_queue = []
for N in range(0, 10): # We do not need this modality for the test model_9
    style_N = (N + 1) % 9 # For the last train model we take style stats from the first train model
    
    EXPNAME = f'TOMOGRAM_{N}'  # Just for visualizing the progress
    tomo_folder = pj(data_folder, f'model_{N}', 'faket')

    conf = nstc.copy()
    conf.update({
        'content': pj(tomo_folder, 'projections_content.mrc'),
        'style': pj(data_folder, f'model_{style_N}', 'projections.mrc'), 
        '--init': pj(tomo_folder, 'projections_noisy.mrc'),
        '--output': pj(tomo_folder, 'projections_styled.mrc'), 
        '--random-seed': N,
    })
    
    command = get_command(EXPNAME, NST_command, conf)
    command_queue.append(command)
    
# Run all the commands (returns list of failed commands if any)
gpuMultiprocessing.queue_runner(command_queue, gpu_id_list,
                                env_gpu_name='CUDA_VISIBLE_DEVICES',
                                processes_per_gpu=6, allowed_restarts=1)

## Computing reconstructions

In [None]:
recc = {  # RECONSTRUCTION BASE CONFIG
    'downsample_angle' : 1,  # Sinogram downsampling in theta dimension (1 = no downsampling)
    'downsample_pre' : 2,  # Sinogram downsampling (1 = no downsampling)
    'order' : 3,  # Downsampling in space with spline interpolation of order (0 - 5)
    'filtering' : 'ramp2d',  # Filter userd during reconstruction in FBP algorithm
    'filterkwargs' : {'crowtherFreq': 25, 'radiusCutoff': 230, 'angularCutoff': (0, 83)},
    'downsample_post' : 1,  # Reconstruction downsampling
    'ncpus': 61, # multiprocessing.cpu_count(),  # Number of CPUs to use while reconstructing
    'z_valid': z_valid # 2-tuple range of valid pixels in Z dimension normalized from 0 to 1. (0., 1.) or None for all.
}

In [None]:
# reconstruct faket/projections_content.mrc to produce faket/reconstruction_content.mrc
for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = recc.copy()
    conf.update({
        'input_mrc' :  pj(data_folder, f'model_{N}', 'faket/projections_content.mrc'), 
        'theta': pj(data_folder, f'model_{N}', 'alignment_simulated.txt'), 
        'output_mrc' :  pj(data_folder, f'model_{N}', 'faket/reconstruction_content.mrc')
    })
    reconstruct_mrc(**conf)

In [None]:
# reconstruct faket/projections_noisy.mrc to produce faket/reconstruction_noisy.mrc
for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = recc.copy()
    conf.update({
        'input_mrc' : pj(data_folder, f'model_{N}', 'faket/projections_noisy.mrc'), 
        'theta': pj(data_folder, f'model_{N}', 'alignment_simulated.txt'), 
        'output_mrc' : pj(data_folder, f'model_{N}', 'faket/reconstruction_noisy.mrc')
    })
    reconstruct_mrc(**conf)

In [None]:
# reconstruct faket/projections_styled.mrc to produce faket/reconstruction_styled.mrc
for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = recc.copy()
    conf.update({
        'input_mrc' : pj(data_folder, f'model_{N}', 'faket/projections_styled.mrc'), 
        'theta': pj(data_folder, f'model_{N}', 'alignment_simulated.txt'), 
        'output_mrc' : pj(data_folder, f'model_{N}', 'faket/reconstruction_styled.mrc')
    })
    reconstruct_mrc(**conf)

In [None]:
# reconstruct projections.mrc to produce faket/reconstruction_baseline.mrc
for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = recc.copy()
    conf.update({
        'input_mrc' :  pj(data_folder, f'model_{N}', 'projections.mrc'), 
        'theta': pj(data_folder, f'model_{N}', 'alignment_simulated.txt'), 
        'output_mrc' : pj(data_folder, f'model_{N}', 'faket/reconstruction_baseline.mrc')
    })
    reconstruct_mrc(**conf)

In [None]:
# reconstruct faket/projections_noiseless.mrc to produce faket/reconstruction_noiseless.mrc
for N in range(0, 10):
    print(f'Processing N: {N}')
    conf = recc.copy()
    conf.update({
        'input_mrc' :  pj(data_folder, f'model_{N}', 'faket/projections_noiseless.mrc'), 
        'theta': pj(data_folder, f'model_{N}', 'alignment_simulated.txt'), 
        'output_mrc' : pj(data_folder, f'model_{N}', 'faket/reconstruction_noiseless.mrc')
    })
    reconstruct_mrc(**conf)

# Deep Finder experiments

1. `DF('faket/reconstruction_baseline.mrc')` - train for 30 epochs on 9 tomograms, eval on test tomogram every 5 epochs
2. `DF('faket/reconstruction_content.mrc')` - train for 30 epochs on 9 tomograms, eval on test tomogram every 5 epochs
3. `DF('faket/reconstruction_noisy.mrc')` - train for 30 epochs on 9 tomograms, eval on test tomogram every 5 epochs
4. `DF('faket/reconstruction_styled.mrc')` - train for 30 epochs on 9 tomograms, eval on test tomogram every 5 epochs

In [None]:
import os
from itertools import product

In [None]:
def get_full_DF_training_command(tomo_path, DF_training_command, config):
    command = (
        f"{DF_training_command} "
        f"--training_tomogram_ids {' '.join(list(zip(*config['training_tomograms']))[0])} "
        f"--training_tomograms {' '.join(list(zip(*config['training_tomograms']))[1])} "
        f"{' '.join([f'{k} {v}' for k, v in config.items() if k.startswith('--')])} "       
    )
    return command

In [None]:
gpu_id_list = [0]
tomo_path = 'data/shrec2021_extended_dataset/'
DF_training_command ='PYTHONHASHSEED=0 python faket/deepfinder/launch_training.py'

# create config files from dict using json for different seeds
experiment_name = "exp1"
num_seeds = 1

command_queue = []

for N in range(1,num_seeds+1):
    training_conf ={
        "--training_tomo_path" : tomo_path,
        "training_tomograms" : [[str(i),"baseline"] for i in range(0,9)],
        "--num_epochs" : 2,
        "--out_path" : "results/" + experiment_name + "/seed" + str(N) + "/",
        "--save_every" : 1, 
        "--seed1" : "1" + str(N) + "2",
        "--seed2" : "1" + str(N) + "23",
    }
    
    command = get_full_DF_training_command(tomo_path, DF_training_command, training_conf)
    command_queue.append(command)

    
# Run all the commands (returns list of failed commands if any)
gpuMultiprocessing.queue_runner(command_queue, gpu_id_list,
                                env_gpu_name='CUDA_VISIBLE_DEVICES',
                                processes_per_gpu=2, allowed_restarts=0)

In [None]:
def get_full_DF_analysis_command(DF_analysis_command, config):
    command=(
        f"{DF_analysis_command} "
        f"{' '.join([f'{k} {v}' for k, v in config.items() if k.startswith('--')])} "
    )
    return command

In [None]:
gpu_id_list = [1]
tomo_path = 'data/shrec2021_extended_dataset/'
DF_segmentation_command ='python faket/deepfinder/launch_segmentation.py'

experiment_name = "exp1"
seed_ids = [1, 2]
num_epochs = [1,2]
test_tomograms = ["baseline"]
test_tomograms_idx = 9

command_queue = []

for N, num_epoch, test_tomogram in product(seed_ids, num_epochs, test_tomograms):
    analysis_conf ={
        "--test_tomo_path" : tomo_path,
        "--test_tomo_idx" : 9, 
        "--test_tomogram" : test_tomogram,
        "--num_epochs" : num_epoch,
        "--DF_weights_path" : "results/" + experiment_name + "/seed" + str(N) + "/",
        "--out_path" : "results/" + experiment_name + "/seed" + str(N) + "/", 
    }
    
    command = get_full_DF_analysis_command(DF_segmentation_command, analysis_conf)
    command_queue.append(command)



    
# Run all the commands (returns list of failed commands if any)
gpuMultiprocessing.queue_runner(command_queue, gpu_id_list,
                                env_gpu_name='CUDA_VISIBLE_DEVICES',
                                processes_per_gpu=2, allowed_restarts=0)

In [None]:
# compute the commands and store them in queue for clustering and evaluation
DF_clustering_command ='python faket/deepfinder/launch_clustering.py'
DF_evaluation_command ='python faket/deepfinder/launch_evaluation.py'
experiment_name = "exp1"
seed_ids = [1, 2]
num_epochs = [1,2]
test_tomograms = ["baseline"]
num_processes = 2

command_queue_clustering = []
command_queue_evaluation = []

for N, num_epoch, test_tomogram in product(seed_ids, num_epochs, test_tomograms):
    analysis_conf ={
        "--test_tomogram" : test_tomogram,
        "--test_tomo_idx" : 9,
        "--num_epochs" : num_epoch,
        "--label_map_path" : "results/" + experiment_name + "/seed" + str(N) + "/",
        "--out_path" : "results/" + experiment_name + "/seed" + str(N) + "/", 
    }
    
    command_clustering = get_full_DF_analysis_command(DF_clustering_command, analysis_conf)
    command_queue_clustering.append(command_clustering)
    
    command_evaluation = get_full_DF_analysis_command(DF_evaluation_command, analysis_conf)
    command_queue_evaluation.append(command_evaluation)


In [None]:
# run the clustering
num_cpu = 4
cpu_id_list = list(range(num_cpu))

gpuMultiprocessing.queue_runner(command_queue_clustering, cpu_id_list,
                                env_gpu_name='CUDA_VISIBLE_DEVICES',
                                processes_per_gpu=1, allowed_restarts=0)

In [None]:
# run the evaluation
num_cpu = 4
cpu_id_list = list(range(num_cpu))

gpuMultiprocessing.queue_runner(command_queue_evaluation, cpu_id_list,
                                env_gpu_name='CUDA_VISIBLE_DEVICES',
                                processes_per_gpu=2, allowed_restarts=0)