# How to start

Before starting you must:
- Ensure that `scipp` and `mantid` are on your `PYTHONPATH`.
- Generate the `config.py` file using `make_config.py`. Refer to the `README.md` or `python make_config.py --help` for information.
- Install dependencies : `conda install fabio tifffile` (used for image handling)

For `scipp` and `mantid` follow instructions at: https://scipp.readthedocs.io/en/latest/getting-started/installation.html.

Converted to use scipp and notebook from [this file](https://git.esss.dk/testbeamline/gp2/blob/1c69213b1124982bbbe762da9c6c6457a49f2a92/reduce.py) by Dimitar Tasev on 2020-01-13

In [None]:
from dataclasses import dataclass
try:
    import scipp
    print("scipp found")
except ImportError as e:
    print("scipp is not available in the PYTHONPATH")
    raise e

try:
    import mantid
    print("mantid found")
except ImportError as e:
    print("mantid is not available in the PYTHONPATH")
    raise e

try:
    import scippconfig
    print("scippconfig found")
except ImportError as e:
    print("scippconfig is not available. Make sure you have generated it with `make_config.py`.")
    raise e

try:
    import wfm_stitching
    print("Legacy wfm stitching found")
except ImportError as e:
    print("wfm_stitching is required to benchmark Mantid")
    raise e

In [None]:
import csv
import glob
import fabio
import os

import scipp as sc
import numpy as np
from scipp import Dim
from enum import Enum
from mantid.api import AlgorithmManager, AnalysisDataService
from mantid.simpleapi import (ConvertUnits, Divide, GroupDetectors,
                              ReplaceSpecialValues, SaveNexus, Scale, ScaleX,
                              SumSpectra, mtd, SaveNexusESS)

import timeit

DATA_DIR_NAME = "data_GP2"
experiment_dir = scippconfig.script_root
data_dir = os.path.join(scippconfig.script_root, DATA_DIR_NAME)

if not os.path.exists(data_dir):
    raise FileNotFoundError("The following data directory does not exist,"
                            f" check your make_config.py:\n{data_dir}")

In [None]:
# Customisable Options:
class BenchSelection(Enum):
    Both = 0
    Mantid = 1
    Scipp = 2
    
bench_selection = BenchSelection.Scipp

# defining grouping of 2D detector pixels
grouping_number = 3
nx_target = grouping_number
ny_target = grouping_number

# Rebin regions for each of the 5 frames
# in the format of [bin-start, bin-end, bin width].
# used to crop each image, before stitching them together
frame_parameters = [(15167, 23563, 64),
                    (24393, 32758, 64),
                    (33365, 40708, 64),
                    (41410, 48019, 64),
                    (49041, 55311, 64),
                    (56077, 59872, 64)]

# Repack into min, width, max for Mantid
mantid_frame_parameters = [(p[0], p[2], p[1]) for p in frame_parameters]

# Used to shift the cropped frames so that their bins overlap
# before summing them together into a single frame
frame_shift_increments = [-6630.0, -2420.0, -2253.0, -2095.0, -1946.0, -1810.0]
frame_shifts = [sum(frame_shift_increments[:i + 1]) for i in range(len(frame_shift_increments))]

# Used to rebin the summed frame in order to
# cut off frames that contain no data
rebin_parameters = (8500, 43000, 64)
mantid_rebin_parameters = "8500,64,43000"

In [None]:
# SCIPP Helper functions
def read_x_values(tof_file):
    """
    Reads the TOF values from the CSV into a list
    """
    tof_values = []
    with open(tof_file) as fh:
        csv_reader = csv.reader(fh, delimiter='\t')
        next(csv_reader, None)  # skip header
        for row in csv_reader:
            tof_values.append(float(row[1]))
    return tof_values


def load_tiffs(tiff_dir):
    if not os.path.isdir(tiff_dir):
        raise RuntimeError(tiff_dir + " is not directory")
    stack = []
    path_length = len(tiff_dir) + 1
    filenames = sorted(glob.glob(tiff_dir + "/*.tiff"))
    nfiles = len(filenames)
    count = 0
    print(f"Loading {nfiles} files from '{tiff_dir}'")
    for filename in filenames:
        count += 1
        print('\r{0}: Image {1}, of {2}'.format(filename[path_length:], count, nfiles), end="")
        img = fabio.open(os.path.join(tiff_dir, filename))
        stack.append(np.flipud(img.data))

    return np.array(stack)

def tiffs_to_variable(tiff_dir):
    """
    Loads all tiff images from the directory into a scipp Variable.
    """
    stack = load_tiffs(tiff_dir)
    data = stack.astype(np.float64).reshape(stack.shape[0], stack.shape[1]*stack.shape[2])
    return sc.Variable([Dim.Tof, Dim.Spectrum],
                       values=data, variances=data)

def stitch(data_array, frame_parameters, rebin_parameters):
    """
    Stitches the 5 different frames data.

    It crops out each frame, then shifts it so that all frames align,
    and then rebins to the common bins used for all frames.
    """
    frames = []

    rebin_params = sc.Variable([Dim.Tof], values=np.arange(*rebin_parameters, dtype=np.float64))

    for i, (slice_bins, shift_parameter) in enumerate(zip(frame_parameters, frame_shifts)):
        bins = sc.Variable([Dim.Tof], values=np.arange(*slice_bins, dtype=np.float64))
        # Rebins the whole data to crop it to frame bins
        rebinned = sc.rebin(data_array, Dim.Tof, bins)
        # Shift the frame backwards to make all frames overlap
        rebinned.coords[Dim.Tof] += shift_parameter
        # Rebin to overarching coordinates so that the frame coordinates align
        rebinned = sc.rebin(rebinned, Dim.Tof, rebin_params)

        frames.append(rebinned)

    for f in frames[1:]:
        frames[0] += f

    return frames[0]

def make_detector_groups(nx_original, ny_original, nx_target, ny_target):
    element_width_x = nx_original // nx_target
    element_width_y = ny_original // ny_target

    # To contain our new spectra mappings
    grid = np.zeros((nx_original, ny_original), dtype=np.float64)

    for i in range(0, nx_target):
        x_start = i * element_width_x
        x_end = (i + 1) * element_width_x

        for j in range(0, ny_target):
            y_start = j * element_width_y
            y_end = (j + 1) * element_width_y

            vals = np.full((element_width_x, element_width_y), i + j * nx_target, dtype=np.float64)
            grid[x_start:x_end, y_start:y_end] = vals

    return sc.Variable([Dim.Spectrum], values=grid.ravel())

def mantid_x_values(tof_file):
    tof_values = list()
    with open(tof_file) as fh:
        csv_reader = csv.reader(fh, delimiter='\t')
        next(csv_reader, None)  # skip header
        for row in csv_reader:
            tof_values.append(float(row[1]))
    return tof_values


def mantid_mask_in_place(ws_name):
    spec_info = mtd[ws_name].spectrumInfo()
    for i, item in enumerate(spec_info):
        y = item.position.Y()
        if y > 0.00406 or y < -0.00126:
            spec_info.setMasked(i, True)


def mantid_tiffs_to_workspace(tiff_dir, x_values, ws_name):
    if not os.path.isdir(tiff_dir):
        raise RuntimeError(tiff_dir + " is not directory")
    stack = list()
    path_length = len(tiff_dir) + 1
    filenames = sorted(glob.glob(tiff_dir + "/*.tiff"))
    nfiles = len(filenames)
    count = 0
    print(f"Loading {nfiles} files from '{tiff_dir}' for '{ws_name}'")
    for filename in filenames:
        count += 1
        print('\r{0}: Image {1}, of {2}'.format(filename[path_length:], count, nfiles), end="")
        img = fabio.open(os.path.join(tiff_dir, filename))
        stack.append(np.flipud(img.data))

    data = np.dstack(stack)
    image_dims = data.shape

    # Use AlgorithmManager and create as child for speed. Avoids history creation
    alg = AlgorithmManager.create("CreateWorkspace")
    alg.setChild(True)
    alg.initialize()
    alg.setProperty('DataX', x_values)
    alg.setProperty('DataY', data)
    alg.setProperty('DataE', np.sqrt(data))
    alg.setProperty('NSpec', image_dims[0]*image_dims[1])
    alg.setProperty('OutputWorkspace', ws_name)
    alg.setProperty('UnitX', 'TOF')
    alg.execute()
    ws = alg.getProperty('OutputWorkspace').value
    mtd.addOrReplace(ws_name, ws)
    return ws


def mantid_make_map_file(destination, nx_original, ny_original, nx_target, ny_target, start_spectrum_id=1, grid=None):
    if not nx_original % nx_target == 0:
        raise RuntimeError('Cannot have fractional re-gridding in x')
    if not ny_original % ny_target == 0:
        raise RuntimeError('Cannot have fractional re-gridding in y')

    # Make a grid that models the spectrum ids in our original 2d_detector.
    # Must be a continguous range. We use this for slicing
    if not grid:
        grid = np.arange(start_spectrum_id, (nx_original*ny_original) +
                         start_spectrum_id).reshape((nx_original, ny_original))
    element_width_x = nx_original/nx_target
    element_width_y = ny_original/ny_target

    groups = list()  # To contain our new spectra mappings
    for i in range(0, nx_target):
        startx = int(i * element_width_x)
        endx = int(startx + element_width_x)
        for j in range(0, ny_target):
            starty = int(j * element_width_y)
            endy = int(starty + element_width_y)
            # Create slices for each block in our re-gridded region
            groups.append(grid[starty:endy, startx:endx].flatten())

    with open(destination, 'w') as fh:
        # See http://docs.mantidproject.org/nightly/algorithms/GroupDetectors-v2.html#usage
        fh.write('{}\n'.format(len(groups)))
        for i, group in enumerate(groups):
            fh.write('{}\n'.format(i+1))
            fh.write('{}\n'.format(len(group)))
            fh.write('{}\n'.format(' '.join(map(str, group))))
        return destination

In [None]:
@dataclass
class TotalTime(object):
    scipp_total_time:float = 0
    mantid_total_time:float = 0
    
# A small workaround for the fact floats are immutable types
totals = TotalTime()
    

def _time_f(func):
    # We have to wrap time to return anything and not monkey-patch timeit
    assert(callable(func))
    start = timeit.default_timer()
    result = func()
    stop = timeit.default_timer()

    elapsed = stop - start
    print(f"\n Time (s): {elapsed}")
    return elapsed, result

def bench_scipp_func(func):
    if bench_selection is BenchSelection.Mantid:
        return

    print("\nScipp: ")
    t, result = _time_f(func)
    totals.scipp_total_time += t
    return result

def bench_mantid_func(func):
    if bench_selection is BenchSelection.Scipp:
        return

    print("\nMantid: ")
    t, result = _time_f(func)
    totals.mantid_total_time += t
    return result

In [None]:
tofs_path = os.path.join(data_dir, 'metadata', 'GP2_BCC_time_values.txt')
sample_path = os.path.join(data_dir, 'Timeslices WFM BBC Steel')
ob_path = os.path.join(data_dir, 'Timeslices WFM Open Beam')
instrument_file = os.path.join(experiment_dir, 'IDF', 'V20_Definition_GP2.xml')

# Ensure Mantid isn't holding onto objects if the user re-runs the benchmark
AnalysisDataService.clear()

# Load X values from the TOF file
def load_scipp_samples():
    ds = sc.Dataset()
    ds.coords[Dim.Tof] = sc.Variable([Dim.Tof], unit=sc.units.us, values=read_x_values(tofs_path))
    ds.coords[Dim.Tof] *= 1e3
    ds["sample"] = tiffs_to_variable(sample_path)
    ds["reference"] = tiffs_to_variable(ob_path)
    return ds

def load_mantid_samples():
    tofs = mantid_x_values(tofs_path)
    mantid_sample = mantid_tiffs_to_workspace(sample_path, tofs, "sample")
    mantid_reference = mantid_tiffs_to_workspace(ob_path, tofs, "reference")

    mantid_sample = ScaleX(mantid_sample, 1e3, Operation='Multiply')
    mantid_reference = ScaleX(mantid_reference, 1e3, Operation='Multiply')

    # Note were missing a Scale(pulse_number) where pulse_number is 1/nPulses
    return mantid_sample, mantid_reference

# Prime IO cache - This is intentionally untimed, we use scipp to avoid having to clear the ADS in Mantid
print("Priming IO cache")
load_scipp_samples()
print("\n---- IO Primed, lets benchmark -----")

ds = bench_scipp_func(load_scipp_samples)
result = bench_mantid_func(load_mantid_samples)
if result:
    mantid_sample, mantid_reference = result

In [None]:
def stitch_scipp():
    ds.coords[Dim.Spectrum] = sc.Variable([Dim.Spectrum], values=np.arange(ds["sample"].shape[1]))
    stitched = sc.Dataset(coords={Dim.Tof: sc.Variable([Dim.Tof], values=np.arange(*rebin_parameters, dtype=np.float64))})

    stitched["sample"] = stitch(ds["sample"], frame_parameters, rebin_parameters)
    stitched["reference"] = stitch(ds["reference"], frame_parameters, rebin_parameters)
    return stitched

def stitch_mantid():
    processor = wfm_stitching.WFMProcessor(mantid_frame_parameters, frame_shifts)
    sample_stitched = processor.process(mantid_sample, instrument_file, mantid_rebin_parameters, scale=1, delete_temporary_workspaces=True)
    reference_stitched = processor.process(mantid_reference, instrument_file, mantid_rebin_parameters, scale=1, delete_temporary_workspaces=True)
    return sample_stitched, reference_stitched

stitched = bench_scipp_func(stitch_scipp)
result = bench_mantid_func(stitch_mantid)
if result:
    mantid_stitched_sample, mantid_stitched_reference = result

In [None]:
def normalize_scipp():
    stitched["normalized"] = stitched["sample"] / stitched["reference"]
    replacement=sc.Variable(value=0.0, variance=0.0)
    kwargs = {"nan" : replacement, "posinf" : replacement, "neginf" : replacement}
    sc.nan_to_num(stitched["normalized"].data, out=stitched["normalized"].data, **kwargs)
    return stitched

def normalize_mantid():
    normalized_sample = Divide(mantid_stitched_sample, mantid_stitched_reference)
    normalized_sample = ReplaceSpecialValues(normalized_sample, NaNValue=0, InfinityValue=0)
    return normalized_sample

stitched = bench_scipp_func(normalize_scipp)
normalized_mantid = bench_mantid_func(normalize_mantid)


In [None]:
def scipp_group_detectors():
    stitched.coords["detector_mapping"] = make_detector_groups(324, 324, nx_target, ny_target)
    grouped = sc.groupby(stitched["normalized"], "detector_mapping")
    return grouped

def mantid_group_detectors():
    mapping_file = os.path.join(data_dir, "mantid_mapping_file.txt")
    mapping_file = mantid_make_map_file(destination=mapping_file, nx_original=324,
                                        ny_original=324, nx_target=nx_target, ny_target=ny_target, start_spectrum_id=1)
    grouped = GroupDetectors(normalized_mantid, MapFile=mapping_file)
    return grouped

grouped_unsummed = bench_scipp_func(scipp_group_detectors)
grouped_mantid = bench_mantid_func(mantid_group_detectors)


In [None]:
def scipp_sum_spectra():
    grouped = sc.Dataset()
    grouped["normalized_grpd"] = grouped_unsummed.sum(Dim.Spectrum)
    return grouped

def mantid_sum_spectra():
    summed = SumSpectra(grouped_mantid)
    return summed

summed = bench_scipp_func(scipp_sum_spectra)
summed_mantid = bench_mantid_func(mantid_sum_spectra)

In [None]:
def scipp_convert_units():
    # Adds the component info needed for converting units
    sc.compat.mantid.load_component_info(stitched, instrument_file)
    summed.coords["source_position"] = stitched.coords["source_position"]
    summed.coords["sample_position"] = stitched.coords["sample_position"]

    # makes the position a DataConstProxy otherwise groupby won't take it
    stitched["position"] = stitched.coords["position"]
    dm1d = sc.groupby(stitched["position"], "detector_mapping")
    position = dm1d.mean(Dim.Spectrum)

    # can't do stitched.labels["position"] = position because Labels won't take a DataArray
    # also can't do stitched.labels["position"] = sc.Variable(position) because then sc.convert
    # thinks stitched.labels["position"] is dimensionless (as it's actually still a DataArray)

    pos = np.zeros((position.shape[0], 3))
    for i, val in enumerate(position.values):
        pos[i, :] = val

    # finally add it back!
    summed.coords["position"] = sc.Variable(position.dims, pos, unit=sc.units.m, dtype=position.dtype)
    summed["normalized_wl"] = sc.neutron.convert(summed["normalized_grpd"], Dim.Tof, Dim.Wavelength)
    return summed

def mantid_convert_units():
   wav_ws = ConvertUnits(summed_mantid, Target='Wavelength')
   return wav_ws

wavelength = bench_scipp_func(scipp_convert_units)
wavelength_mantid = bench_mantid_func(mantid_convert_units)

In [None]:
print("Benchmark completed")
print(f"Scipp total time: {totals.scipp_total_time}")
print(f"Mantid total time: {totals.mantid_total_time}")

# The below is not benchmarked as it uses Mantid to do the processing anyway

# x_coords = wavelength["normalized_wl"].coords["wavelength"]
# x_dim = wavelength["normalized_wl"].dims[0]
# x = x_coords.values
#
# # Mantid expects the data in a different shape
# # which is spectrum as outer-most dimension.
# y = np.transpose(wavelength["normalized_wl"].values)
# e = np.transpose(wavelength["normalized_wl"].variances)
#
# ws = sc.compat.mantid.to_workspace_2d(x, y, e, str(x_dim), instrument_file)

In [None]:
# If you want to save the workspace uncomment this line
# SaveNexusESS(ws, "scipp_normalized_wl.nxs")

In [None]:
# fit_ds = sc.compat.mantid.fit(ws,
#                     function='name=LinearBackground,A0=5000,A1=0;name=UserFunction,Formula=h*erfc(a*(x-x0)),h=5000,a=-0.5,x0=4',
#                     workspace_index=2, start_x=3.6, end_x=4.4)