In [1]:
import re

import pandas as pd


def data_preparation(df: pd.DataFrame, keep_ratio: float = 0.8) -> pd.DataFrame:
    stats = (
        df.groupby(["Module", "Function"])
        .agg(
            mean_cpu=pd.NamedAgg(column="CPU Time", aggfunc="mean"),
            std_cpu=pd.NamedAgg(column="CPU Time", aggfunc="std"),
            mean_mem_bound=pd.NamedAgg(column="Memory Bound(%)", aggfunc="mean"),
            std_mem_bound=pd.NamedAgg(column="Memory Bound(%)", aggfunc="std"),
            mean_fp=pd.NamedAgg(column="Vectorization:% of FP Ops(%)", aggfunc="mean"),
            std_fp=pd.NamedAgg(column="Vectorization:% of FP Ops(%)", aggfunc="std"),
        )
        .sort_values(by="mean_cpu", ascending=False)
    )
    stats["perc_makespan"] = stats["mean_cpu"] / stats["mean_cpu"].sum() * 100
    stats["cumperc_makespan"] = stats["perc_makespan"].cumsum()
    hotspots = stats[
        stats["cumperc_makespan"] < max(keep_ratio * 100, stats["cumperc_makespan"][0])
    ]

    # Total CPU Time per module
    module_mean_cpu = hotspots.groupby("Module")["mean_cpu"].sum().to_dict()
    hotspots = hotspots.reset_index()
    hotspots["module_cpu"] = hotspots["Module"].map(module_mean_cpu)

    # Clean Function and Module names
    def module_short(module: str) -> str:
        if m := re.match(r"^\w+\.so", module):
            return m.group(0)
        return module

    hotspots["func_short"] = hotspots["Function"].apply(
        lambda x: re.sub(r"<.*>", "", x)
    )
    hotspots["module_short"] = hotspots["Module"].apply(module_short)

    # CPU Time sorted by "Module CPU Time" -> "CPU Time".
    hotspots = hotspots.sort_values(by=["module_cpu", "mean_cpu"], ascending=False)
    # Reorder the cumulative makespan for plotting.
    hotspots["cumperc_makespan"] = hotspots["perc_makespan"].cumsum()
    hotspots = hotspots.reset_index()
    hotspots.index += 1
    return hotspots


In [2]:
import math

from bokeh.io import output_notebook
from bokeh.models import (
    ColorBar,
    ColumnDataSource,
    FactorRange,
    HoverTool,
    LabelSet,
    LinearAxis,
    LinearColorMapper,
    Range1d,
    Whisker,
)
from bokeh.plotting import figure, show
from bokeh.transform import transform


output_notebook()


def plot_hotspots(
    data: pd.DataFrame,
    *,
    y_limit: float | None = None,
    nthreads: int | None = None,
    colorbar_color: str = "Plasma256",
    colorbar_on: str,
    colorbar_title: str,
    pipeline: str,
):
    xs = [(module, str(i)) for i, module in enumerate(data["module_short"], start=1)]
    source = ColumnDataSource(
        data=dict(
            x=xs,
            module=data["Module"].values,
            func=data["Function"].values,
            module_short=data["module_short"].values,
            func_short=data["func_short"].values,
            func_id=data.index.values,
            perc_makespan=data["perc_makespan"].values,
            cumperc_makespan=data["cumperc_makespan"].values,
            mean_cpu=data["mean_cpu"].values,
            std_cpu=data["std_cpu"].values,
            std_lower_cpu=data["mean_cpu"].values - data["std_cpu"].values,
            std_upper_cpu=data["mean_cpu"].values + data["std_cpu"].values,
            mean_mem_bound=data["mean_mem_bound"].values,
            std_mem_bound=data["std_mem_bound"].values,
            mean_fp=data["mean_fp"].values,
            std_fp=data["std_fp"].values,
        )
    )

    p = figure(
        x_range=FactorRange(*xs),
        title=(
            f"Avg. CPU Time (seconds): {pipeline}"
            + (f" with {str(nthreads)} threads" if nthreads else "")
        ),
        width=1500,
        toolbar_location=None,
    )

    # Tools
    hover = HoverTool()
    hover.tooltips = [
        ("Module", "@module"),
        # ("Function", "@func"),
        ("Module (short)", "@module_short"),
        ("Function (short)", "@func_short"),
        ("Function ID", "@func_id"),
        ("Makespan contribution", "@perc_makespan{1.1}%"),
        ("Makespan contribution (cumulative)", "@cumperc_makespan{1.1}%"),
        ("CPU Time (mean ± std)", "@mean_cpu{1.11} ± @std_cpu{1.11}"),
        (
            "% of memory bound (mean ± std)",
            "@mean_mem_bound{1.11} ± @std_mem_bound{1.11}",
        ),
        ("% of FP ops (mean ± std)", "@mean_fp{1.11} ± @std_fp{1.11}"),
    ]
    p.tools.append(hover)

    # Mean
    color_mapper = LinearColorMapper(palette=colorbar_color, low=0, high=100)
    color_bar = ColorBar(
        color_mapper=color_mapper,
        label_standoff=14,
        location=(0, 0),
        title=colorbar_title,
        title_text_font_size="20pt",
    )
    p.add_layout(color_bar, "left")
    p.vbar(
        x="x",
        top="mean_cpu",
        width=0.9,
        source=source,
        color=transform(colorbar_on, color_mapper),
    )
    # Error
    error = Whisker(
        base="x",
        upper="std_upper_cpu",
        lower="std_lower_cpu",
        source=source,
        level="annotation",
        line_width=2,
    )
    error.upper_head.size = 20
    error.lower_head.size = 20
    p.add_layout(error)

    # Cumulative makespan
    p.extra_y_ranges = {"percentage": Range1d(start=0, end=100)}
    p.add_layout(
        LinearAxis(y_range_name="percentage", axis_label="Cumulative makespan (%)"),
        "right",
    )
    p.scatter(
        x="x",
        y="cumperc_makespan",
        source=source,
        y_range_name="percentage",
        color="lightgreen",
        size=10,
    )

    # # Bar labels
    # labels = LabelSet(
    #     x="x",
    #     y=0,
    #     text="func_short",
    #     x_offset=0,
    #     y_offset=0,
    #     angle=math.pi / 2,
    #     source=source,
    #     text_color="black",
    #     text_font_size="0.8em",
    #     background_fill_color="white",
    #     background_fill_alpha=0.5,
    # )
    # p.add_layout(labels)

    # Style
    if not y_limit:
        y_limit = 1.05 * (data["mean_cpu"].values + data["std_cpu"].values).max()
    p.y_range.end = y_limit
    # p.y_range = Range1d(start=0, end=y_limit)
    p.x_range.range_padding = 0.1
    p.xaxis.major_label_orientation = math.pi / 2
    p.xaxis.group_label_orientation = math.pi / 8
    p.xaxis.separator_line_alpha = 0
    p.xgrid.grid_line_color = None
    p.yaxis.formatter.use_scientific = False

    ## Font size
    p.title.text_font_size = "25pt"
    p.xaxis.group_text_font_size = "12pt"
    p.yaxis.axis_label_text_font_size = "20pt"
    p.yaxis.major_label_text_font_size = "15pt"

    # Prevent overlap of x-axis labels
    if (l := len(data)) > 120:
        p.xaxis.major_label_text_font_size = "6pt"
    elif l > 80:
        p.xaxis.major_label_text_font_size = "8pt"
    else:
        p.xaxis.major_label_text_font_size = "12pt"

    return p

In [11]:
from pathlib import Path

from bokeh.palettes import Colorblind8
from bokeh.transform import factor_cmap


def plot_makespan(path: Path, *, nthreads: int = 1):
    # Data preparation
    data = pd.concat(
        (
            pd.read_csv(filename, delimiter="\t").assign(
                filename=filename.stem,
                pipeline="/".join(filename.relative_to(path).parts[:2]),
            )
            for filename in path.rglob("*.csv")
        ),
        ignore_index=True,
    )

    group = (
        data.groupby(["pipeline", "filename"])["CPU Time"]
        .sum()
        .divide(nthreads)
        .groupby("pipeline")
        .agg(["mean", "std"])
    )

    source = ColumnDataSource(
        data=dict(
            pipeline=group.index.values,
            mean=group["mean"].values,
            std=group["std"].values,
            std_lower=group["mean"].values - group["std"].values,
            std_upper=group["mean"].values + group["std"].values,
        )
    )

    # Plotting
    cmap = factor_cmap(
        "pipeline", palette=Colorblind8, factors=sorted(data["pipeline"].unique())
    )
    p = figure(
        x_range=group.index.values,
        title=f"Average Makespan (seconds)",
        width=1500,
        toolbar_location=None,
        y_axis_label="Makespan (seconds)",
    )

    p.vbar(
        x="pipeline",
        top="mean",
        width=0.9,
        source=source,
        line_color=cmap,
        fill_color=cmap,
    )
    # Error
    error = Whisker(
        base="pipeline",
        upper="std_upper",
        lower="std_lower",
        source=source,
        level="annotation",
        line_width=2,
    )
    error.upper_head.size = 20
    error.lower_head.size = 20
    p.add_layout(error)

    # Tools
    hover = HoverTool()
    hover.tooltips = [
        ("Pipeline", "@pipeline"),
        ("Makespan (mean ± std)", "@mean{1.11} ± @std{1.11}"),
    ]
    p.tools.append(hover)

    # Appearance
    p.y_range.end = 1.05 * (group["mean"].values + group["std"].values).max()
    p.y_range.start = 0
    p.xaxis.major_label_orientation = math.pi / 8
    p.xgrid.grid_line_color = None
    p.outline_line_color = None

    ## Font size
    p.title.text_font_size = "25pt"
    p.xaxis.major_label_text_font_size = "15pt"
    p.yaxis.axis_label_text_font_size = "15pt"
    p.yaxis.major_label_text_font_size = "15pt"

    return p

In [4]:
import socket
import warnings

from selenium import webdriver


class remote_driver:
    def __enter__(self):
        options = webdriver.ChromeOptions()
        options.add_argument("--headless")
        options.add_argument("--window-size=1920,1080")
        options.add_argument("--ignore-ssl-errors=yes")
        options.add_argument("--ignore-certificate-errors")

        remote = socket.gethostbyname(socket.gethostname()) + ":4444"
        try:
            self.driver = webdriver.Remote(remote, options=options)
            self.driver.maximize_window()
            return self.driver
        except:
            warnings.warn("Failed to initialize Selenium webdriver.")
            return None

    def __exit__(self, exc_type, exc_value, exc_tb):
        if hasattr(self, "driver"):
            self.driver.quit()


In [5]:
from typing import Optional


from bokeh.io import export_png


def show_and_export(
    p: figure,
    *,
    fout: Optional[Path] = None,
    driver: Optional[webdriver.Remote] = None,
    hide_title: bool = False,
):
    if fout and driver:
        title = p.title
        if hide_title:
            p.title = ""

        fout.parent.mkdir(parents=True, exist_ok=True)
        export_png(p, filename=fout, webdriver=driver)

        p.title = title
    return show(p)

In [6]:
def read_profiling_data(path: Path, *, delimiter: str = "\t") -> pd.DataFrame:
    return pd.concat(
        (
            pd.read_csv(filename, delimiter=delimiter)
            for filename in path.rglob("*.csv")
        ),
        ignore_index=True,
    )


# Results

In [7]:
figures_dir = Path("paper", "figures")
figures_dir.mkdir(parents=True, exist_ok=True, mode=0o755)

tables_dir = Path("paper", "tables")
tables_dir.mkdir(parents=True, exist_ok=True, mode=0o755)


## Single-threaded

In [12]:
profiling_dir = Path(
    "/", "mnt", "lustre", "mathdugre", "mri-bottleneck", "vtune_output", "1-threads"
)

with remote_driver() as driver:
    show_and_export(
        plot_makespan(profiling_dir),
        fout=figures_dir / "makespan-1thread.png",
        driver=driver,
    )


In [13]:
experiments: dict[str, tuple[tuple[str, float | None]]] = {
    "ants": (
        ("brainExtraction", 700),
        ("brainExtraction-fp", 700),
        ("registrationSyN", 2200),
        ("registrationSyN-fp", 2200),
    ),
    "fsl": (
        ("fast", None),
        ("mcflirt", None),
        ("flirt", None),
    ),
    "freesurfer": (("reconall", None),),
}

with remote_driver() as driver:
    for toolkit, pipelines in experiments.items():
        for pipeline, y_limit in pipelines:
            profiling_data = read_profiling_data(profiling_dir / toolkit / pipeline)
            data = data_preparation(profiling_data, keep_ratio=0.8)
            p = plot_hotspots(
                data,
                pipeline=f"{toolkit}.{pipeline}",
                y_limit=y_limit,
                nthreads=1,
                colorbar_on="mean_mem_bound",
                colorbar_title="% of memory bound",
                colorbar_color="Plasma256",
            )

            _filename = f"hotspots-1thread-{toolkit}-{pipeline}"
            show_and_export(
                p,
                fout=figures_dir / f"{_filename}.png",
                driver=driver,
            )

            out_data = data[
                ["module_short", "func_short", "mean_cpu", "std_cpu"]
            ]
            out_data.to_csv(tables_dir / f"{_filename}.csv")
            print(out_data.head(10).to_string())

           module_short                                                                                   func_short    mean_cpu    std_cpu
1   libantsUtilities.so                                       itk::LinearInterpolateImageFunction::EvaluateOptimized  544.093712  76.331690
2   libantsUtilities.so                         itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  371.688311   4.182069
3   libantsUtilities.so                                              itk::DisplacementFieldTransform::TransformPoint  126.218381   1.478981
4   libantsUtilities.so  itk::MattesMutualInformationImageToImageMetricv4GetValueAndDerivativeThreader::ProcessPoint  102.983413  14.454829
5   libantsUtilities.so                                                      itk::CompositeTransform::TransformPoint   99.512276   9.404046
6   libantsUtilities.so                                    itk::ImageToImageMetricv4::TransformAndEvaluateFixedPoint   81.160882   2.555888
7   libantsUtilities

           module_short                                                                                   func_short    mean_cpu    std_cpu
1   libantsUtilities.so                         itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  575.264534   4.329840
2   libantsUtilities.so                                       itk::LinearInterpolateImageFunction::EvaluateOptimized  476.890300  68.556801
3   libantsUtilities.so                                              itk::DisplacementFieldTransform::TransformPoint  264.323000   1.819090
4   libantsUtilities.so                                                                       itk::Matrix::operator*  101.334480  34.760053
5   libantsUtilities.so  itk::MattesMutualInformationImageToImageMetricv4GetValueAndDerivativeThreader::ProcessPoint   99.082268  15.859885
6   libantsUtilities.so                                               itk::MatrixOffsetTransformBase::TransformPoint   79.693317  10.104189
7   libantsUtilities

           module_short                                                                                                       func_short     mean_cpu     std_cpu
1   libantsUtilities.so                                             itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  1375.848259   19.561348
2   libantsUtilities.so                                                           itk::LinearInterpolateImageFunction::EvaluateOptimized   529.508021   19.100670
3   libantsUtilities.so                                                                  itk::DisplacementFieldTransform::TransformPoint   453.335583    5.803551
4   libantsUtilities.so                                                        itk::ImageToImageMetricv4::TransformAndEvaluateFixedPoint   217.932752    3.588040
5   libantsUtilities.so                                                                                                   itk::ImageBase   165.950744  193.831031
6   libantsUtilities.so  itk

           module_short                                                                                                       func_short     mean_cpu     std_cpu
1   libantsUtilities.so                                             itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  2142.724219   22.121105
2   libantsUtilities.so                                                                  itk::DisplacementFieldTransform::TransformPoint   956.173960    8.706631
3   libantsUtilities.so                                                           itk::LinearInterpolateImageFunction::EvaluateOptimized   571.696544   15.783523
4   libantsUtilities.so  itk::ANTSNeighborhoodCorrelationImageToImageMetricv4GetValueAndDerivativeThreader::UpdateQueuesToNextScanWindow   266.838272    4.984732
5   libantsUtilities.so                                                       itk::ImageToImageMetricv4::TransformAndEvaluateMovingPoint   175.231903    2.548815
6   libantsUtilities.so     

           module_short                            func_short   mean_cpu    std_cpu
1                  fast                    NEWIMAGE::convolve  68.025297   0.640115
2                  fast     ZMRISegmentation::MRFWeightsInner  18.593308   1.766365
3                  fast       ZMRISegmentation::UpdateMembers   3.887168   0.388937
4  [Outside any module]            [Outside any known module]  37.632594  56.062160
5          libm-2.31.so                                   exp   6.564961   1.137590
6          libm-2.31.so                          func@0x80fa4   6.300627   0.661480
7          libc-2.31.so                         func@0x18b644   5.698087   1.416873
8    libfsl-newimage.so  NEWIMAGE::maskedIterator::operator++   4.798749   0.072273


           module_short                     func_short  mean_cpu   std_cpu
1    libfsl-newimage.so  NEWIMAGE::q_tri_interpolation  6.969624  0.156024
2    libfsl-newimage.so  NEWIMAGE::p_normcorr_smoothed  4.689278  0.167209
3    libfsl-newimage.so           NEWIMAGE::findrangex  1.676788  0.096267
4  [Outside any module]     [Outside any known module]  5.392307  6.919619
5               libz.so                  longest_match  2.296634  0.114837
6          libc-2.31.so                    __libc_fork  1.776087  0.160683
7               vmlinux               perf_iterate_ctx  0.459667  0.055248
8               vmlinux                   zlib_inflate  0.453884  0.041229


           module_short                       func_short   mean_cpu    std_cpu
1    libfsl-newimage.so    NEWIMAGE::q_tri_interpolation  22.685123   3.973305
2    libfsl-newimage.so  NEWIMAGE::p_corr_ratio_smoothed   6.722008   0.826689
3  [Outside any module]       [Outside any known module]  10.021743  16.534466
4                 flirt               NEWIMAGE::convolve   5.283166   0.061738


       module_short                                                 func_short    mean_cpu     std_cpu
1   mri_ca_register       _Z27gcamComputeMetricPropertiesP9GCA_MORPH.extracted  806.806438   77.471677
2   mri_ca_register  _ZL24gcamSmoothnessEnergy_newPK9GCA_MORPHPK3MRI.extracted  718.968653   57.582192
3   mri_ca_register                                  different_neighbor_labels  561.477400  434.745530
4   mri_ca_register                  _Z13MRIconvolve1dP3MRIS0_Pfiiii.extracted  527.809413   75.943589
5   mri_ca_register                                     gcamJacobianTermAtNode  326.990279   33.976457
6   mri_ca_register                                              __libm_log_l9  189.935747   14.287451
7   mri_ca_register                                               MRIgetVoxVal  176.171857   48.313650
8   mri_ca_register                                          gcamApplyGradient  172.507140   15.013794
9   mri_ca_register                                MRIconvolve1d.extracte

## Multi-threaded (32 threads)

In [14]:
profiling_dir = Path(
    "/", "mnt", "lustre", "mathdugre", "mri-bottleneck", "vtune_output", "32-threads"
)

with remote_driver() as driver:
    show_and_export(
        plot_makespan(profiling_dir, nthreads=32),
        fout=figures_dir / "makespan-32threads.png",
        driver=driver,
    )

In [15]:
experiments: dict[str, tuple[tuple[str, float | None]]] = {
    "ants": (
        ("brainExtraction", 1200),
        ("brainExtraction-fp", 1200),
        ("registrationSyN", 3000),
        ("registrationSyN-fp", 3000),
    ),
    "fsl": (
        ("fast", 120),
        ("flirt", 120),
    ),
    "freesurfer": (("reconall", None),),
}

with remote_driver() as driver:
    for toolkit, pipelines in experiments.items():
        for pipeline, y_limit in pipelines:
            profiling_data = read_profiling_data(profiling_dir / toolkit / pipeline)
            data = data_preparation(profiling_data, keep_ratio=0.8)
            p = plot_hotspots(
                data,
                pipeline=f"{toolkit}.{pipeline}",
                y_limit=y_limit,
                nthreads=32,
                colorbar_on="mean_mem_bound",
                colorbar_title="% of memory bound",
                colorbar_color="Plasma256",
            )

            _filename = f"hotspots-32threads-{toolkit}-{pipeline}"
            show_and_export(
                p,
                fout=figures_dir / f"{_filename}.png",
                driver=driver,
            )
            out_data = data[
                ["module_short", "func_short", "mean_cpu", "std_cpu"]
            ]
            out_data.to_csv(tables_dir / f"{_filename}.csv")
            print(out_data.head(10).to_string())

           module_short                                                                                   func_short    mean_cpu     std_cpu
1   libantsUtilities.so                                       itk::LinearInterpolateImageFunction::EvaluateOptimized  897.455934  123.026552
2   libantsUtilities.so                         itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  521.304035    4.622968
3   libantsUtilities.so  itk::MattesMutualInformationImageToImageMetricv4GetValueAndDerivativeThreader::ProcessPoint  205.212182   30.978831
4   libantsUtilities.so                                              itk::DisplacementFieldTransform::TransformPoint  176.374455    2.368939
5   libantsUtilities.so                                                      itk::CompositeTransform::TransformPoint  138.288299   12.773644
6   libantsUtilities.so             itk::CompositeTransform::ComputeJacobianWithRespectToParametersCachedTemporaries  130.886573   17.544418
7   libantsUt

           module_short                                                                                   func_short    mean_cpu     std_cpu
1   libantsUtilities.so                         itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  771.606798    4.762566
2   libantsUtilities.so                                       itk::LinearInterpolateImageFunction::EvaluateOptimized  739.335143   86.873504
3   libantsUtilities.so                                              itk::DisplacementFieldTransform::TransformPoint  347.336350    2.919278
4   libantsUtilities.so  itk::MattesMutualInformationImageToImageMetricv4GetValueAndDerivativeThreader::ProcessPoint  214.077214   37.178156
5   libantsUtilities.so             itk::CompositeTransform::ComputeJacobianWithRespectToParametersCachedTemporaries  149.201633   23.138998
6   libantsUtilities.so                                                                       itk::Matrix::operator*  145.114224   35.316815
7   libantsUt

           module_short                                                                                                       func_short     mean_cpu     std_cpu
1   libantsUtilities.so                                             itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  1931.364105   18.623210
2   libantsUtilities.so                                                           itk::LinearInterpolateImageFunction::EvaluateOptimized   766.362248   17.096843
3   libantsUtilities.so                                                                  itk::DisplacementFieldTransform::TransformPoint   634.644100   12.655191
4   libantsUtilities.so                                                        itk::ImageToImageMetricv4::TransformAndEvaluateFixedPoint   306.018239    6.539041
5   libantsUtilities.so  itk::ANTSNeighborhoodCorrelationImageToImageMetricv4GetValueAndDerivativeThreader::UpdateQueuesToNextScanWindow   225.308791    6.021050
6   libantsUtilities.so     

           module_short                                                                                                       func_short     mean_cpu     std_cpu
1   libantsUtilities.so                                             itk::VectorLinearInterpolateImageFunction::EvaluateAtContinuousIndex  2896.482148   45.082220
2   libantsUtilities.so                                                                  itk::DisplacementFieldTransform::TransformPoint  1264.369795   19.459508
3   libantsUtilities.so                                                           itk::LinearInterpolateImageFunction::EvaluateOptimized   806.754183   18.366494
4   libantsUtilities.so  itk::ANTSNeighborhoodCorrelationImageToImageMetricv4GetValueAndDerivativeThreader::UpdateQueuesToNextScanWindow   363.774631   14.283476
5   libantsUtilities.so                                                       itk::ImageToImageMetricv4::TransformAndEvaluateMovingPoint   231.058520    4.343508
6   libantsUtilities.so     

           module_short                            func_short   mean_cpu    std_cpu
1                  fast                    NEWIMAGE::convolve  68.056310   0.705343
2                  fast     ZMRISegmentation::MRFWeightsInner  18.686061   1.977319
3  [Outside any module]            [Outside any known module]  58.217974  55.906508
4          libm-2.31.so                          func@0x80fa4   6.344587   0.603207
5          libm-2.31.so                                   exp   5.734872   2.024739
6          libc-2.31.so                         func@0x18b644   5.373986   1.723478
7    libfsl-newimage.so  NEWIMAGE::maskedIterator::operator++   4.790200   0.090066


           module_short                       func_short   mean_cpu    std_cpu
1    libfsl-newimage.so    NEWIMAGE::q_tri_interpolation  22.525643   3.815177
2    libfsl-newimage.so  NEWIMAGE::p_corr_ratio_smoothed   6.747439   0.805291
3  [Outside any module]       [Outside any known module]  11.614042  18.540967
4                 flirt               NEWIMAGE::convolve   5.287266   0.049313


      module_short                                            func_short       mean_cpu       std_cpu
1      libiomp5.so                                    __kmp_fork_barrier  138521.807319  16658.247635
2      libiomp5.so                                __kmp_api_omp_set_lock   31857.478404   4978.365933
3  mri_ca_register  _Z27gcamComputeMetricPropertiesP9GCA_MORPH.extracted    4487.771640    778.397906


# Test