# SHETRAN Model Merger

*October 2025*

This code will take existing SHETRAN setups and merge them. This is designed to work with nested catchments with a single outflow. Models should all work individually and must each have the following files:
- Library xml
- Subsurface map
- Land cover map
- Lake map

The model library paths should be given in the order that they are to be merged, starting with the largest and working towards the smallest. Map files should be finable using the paths in the library file (normally in the same folder as the library file).

The script works by loading the largest catchment, making this the base for additional catchments. A nested catchment is then loaded, the soil and vegetation detail numbers are increased so that they do not overlap with the existing values. They are then added to the library file and the base soil, landcover, and lake maps overwrite the existing ones. This is then repeated for each additional catchment. Onve complete, the library file and soil and land cover maps are simplified so that there is no duplication.

**Assumptions**

- All maps are aligned to the same grid.
- All maps have the same *nodata* values.
- Maps within the same simulation share the same extent (masks from subsurface are used for land cover and lakes). If this is not the case then you can make multiple masks.

**Troubleshooting**

You could get the error *IndexError: boolean index did not match indexed array along dimension 0; dimension is 1 but corresponding boolean dimension is 29* if the files are not the same shape. This is possible if they have been built differently or have a column/row of NA around the edges. Check this manually (or improve the code).

In [25]:
# --- Preamble ---
import SHETRAN_GB_Master_Setup_Functions as smf
import os
import pandas as pd

# --- Paths to Library Files ---
root = 'C:/Users/nbs65/Newcastle University/Anna Murgatroyd - Shetran-Anglian/05 - SHETRAN Modelling/'

# List these in order of decreasing size.
model_paths = [
    os.path.join(root, '03 Uncalibrated APM Models with Superficials', 'Heigham', f'Heigham_LibraryFile.xml'),
    os.path.join(root, '04 Optimised Models with Superficials', '34004', f'34004_LibraryFile.xml'),
    os.path.join(root, '04 Optimised Models with Superficials', '34005', f'34005_LibraryFile.xml')
]
# Write the output path of the library file:
output_Library_path = os.path.join(root, '05 Heigham/Heigham - PycalSF 34004 34005/Heigham_LibraryFile.xml')


# --- Functions ---
def extract_xml_parameter(xml_line):
    """
    This will take a full line from the library file and return the parameter value.
     '<PrecipitationTimeSeriesData>43018_Precip.csv</PrecipitationTimeSeriesData>\n' --> '43018_Precip.csv'
    """
    return xml_line.split('>')[1].split('<')[0]


def remove_library_lines(library_lines, line_name):
    """
    This function will remove lines from a library file relating to a block of XLM parameters, such as 
    VegetationDetail, SoilProperty, SoilDetail. It will return the cleaned list of lines.
    :param: library_lines: list of strings. The lines of the library file to clean 
    :param: line_name: string. The name of the XML block to remove, e.g. 'VegetationDetail'
    """

    # Clean the existing lines_master of the lines that we want to replace:
    lines_cleaned = []
    for line in library_lines:
        if line.startswith(f'<{line_name}>'):
            continue
        else:
            lines_cleaned.append(line)
    return lines_cleaned


def add_library_lines(lines_to_recieve, lines_to_add, block_name, header=None):
    """
    This function will add lines to a library file relating to a block of XLM parameters, such as 
    VegetationDetail, SoilProperty, SoilDetail. It will return the updated list of lines.
    :param: lines_to_recieve: list of strings. The lines of the library file to add to
    :param: lines_to_add: list of strings. The lines to add to the library file. End lines with '\n'.
    :param: block_name: string. The name of the XML block to add, e.g. 'VegetationDetails'
    :param: header: string. An optional header line to add before the lines_to_add. End header with '\n'.
    """

    # Add the updated lines in lines_updated:
    lines_updated = []
    for line in lines_to_recieve:

        if line.startswith(f'<{block_name}>'):
            lines_updated.append(line)
            if header!=None:
                lines_updated.append(header)
            for l in lines_to_add:
                lines_updated.append(l)

        else:
            lines_updated.append(line)
    
    return lines_updated

In [26]:
# 1. Iterate through each model path:
for i, library_path in enumerate(model_paths):
    print(i, library_path)

    # Load the model files:
    with open(library_path, 'r') as file:
        lines = file.readlines()

    # Get file paths from the library file:
    subsurface_path = [extract_xml_parameter(l) for l in lines if l.startswith('<SoilMap>')][0]
    landcover_path = [extract_xml_parameter(l) for l in lines if l.startswith('<VegMap>')][0]
    lake_path = [extract_xml_parameter(l) for l in lines if l.startswith('<LakeMap>')][0]

    # Add in the library folder path:
    base_path = os.path.dirname(library_path)
    subsurface_path = os.path.join(base_path, subsurface_path)
    landcover_path = os.path.join(base_path, landcover_path)
    lake_path = os.path.join(base_path, lake_path)

    # Load in the subsurface raster files so that they can be merged with another on the next loop:
    if i == 0:
        subsurface_master, ncols_l, nrows_l, xll_l, yll_l, cellsize_l, nodata_l, _, _ = smf.read_ascii_raster(subsurface_path)
        landcover_master = smf.read_ascii_raster(landcover_path, return_metadata=False)
        lake_master = smf.read_ascii_raster(lake_path, return_metadata=False)

        # Make the first library file the master Library File:
        lines_master = lines

        # Collate the master details:
        cover_lines, soilprop_lines, soildetail_lines = [], [], []
        for line in lines:
            if line.startswith('<VegetationDetail>'):
                cover_lines.append(line)
            elif line.startswith('<SoilProperty>'):
                soilprop_lines.append(line)
            elif line.startswith('<SoilDetail>'):
                soildetail_lines.append(line)
            else:
                continue

        # Remove the header from the master lines:
        cover_lines = cover_lines[1:]
        soilprop_lines = soilprop_lines[1:]
        soildetail_lines = soildetail_lines[1:]

    else:
        subsurface_new, ncols_s, nrows_s, xll_s, yll_s, cellsize_s, nodata, _, _ = smf.read_ascii_raster(subsurface_path)
        landcover_new = smf.read_ascii_raster(landcover_path, return_metadata=False)
        lake_new = smf.read_ascii_raster(lake_path, return_metadata=False)

        # 2. Calculate the starting row/col in the large raster where the small raster should be placed
        row_start = int((yll_s - yll_l) / cellsize_l)
        col_start = int((xll_s - xll_l) / cellsize_l)

        # Calculate the starting row from the top (NumPy row 0 is the top)
        row_start = int((yll_l + nrows_l * cellsize_l - yll_s - nrows_s * cellsize_s) / cellsize_l)
        col_start = int((xll_s - xll_l) / cellsize_l)

        # 3. Add the new raster to the master raster, ensuring that the values do not overlap by adding the highest existing map value.
        mask = (subsurface_new != nodata)

        subsurface_master[row_start:row_start+nrows_s, col_start:col_start+ncols_s][mask] = subsurface_new[mask]+subsurface_master.max()
        landcover_master[row_start:row_start+nrows_s, col_start:col_start+ncols_s][mask] = landcover_new[mask]+landcover_master.max()
        lake_master[row_start:row_start+nrows_s, col_start:col_start+ncols_s][mask] = lake_new[mask]  # Don't add, this is binary.

        # Add the new library file detials to the main library file:
        # Collate the soil detials:
        temp_cover_lines, temp_soilprop_lines, temp_soildetail_lines = [], [], []
        for line in lines:
            if line.startswith('<VegetationDetail>'):
                temp_cover_lines.append(line)
            elif line.startswith('<SoilProperty>'):
                temp_soilprop_lines.append(line)
            elif line.startswith('<SoilDetail>'):
                temp_soildetail_lines.append(line)
            else:
                continue
        
        # Remove the header from the temp lines:
        temp_cover_lines = temp_cover_lines[1:]
        temp_soilprop_lines = temp_soilprop_lines[1:]
        temp_soildetail_lines = temp_soildetail_lines[1:]

        # Change the Veg Type in the temp_cover_lines so that they don't overlap with the master cover_lines:
        cover_max = max([int(extract_xml_parameter(x).split(',')[0]) for x in cover_lines])
        for line in temp_cover_lines:
            line_split = extract_xml_parameter(line).split(',')
            line_split[0] = str(int(line_split[0]) + cover_max)
            new_line = '<VegetationDetail>' + ','.join(line_split) + '</VegetationDetail>\n'
            cover_lines.append(new_line)

        # Change the Soil Number in the temp_soilprop_lines so that they don't overlap with the master soilprop_lines:
        soilprop_max = max([int(extract_xml_parameter(x).split(',')[0]) for x in soilprop_lines])
        for line in temp_soilprop_lines:
            line_split = extract_xml_parameter(line).split(',')
            line_split[0] = str(int(line_split[0]) + soilprop_max)
            new_line = '<SoilProperty>' + ','.join(line_split) + '</SoilProperty>\n'
            soilprop_lines.append(new_line)
        

        # Change the Soil Catagory and the Soil Type in the SoilDetail lines to match the Soil Properties:
        soildetail_max = max([float(extract_xml_parameter(x).split(',')[0]) for x in soildetail_lines])
        for line in temp_soildetail_lines:
            line_split = extract_xml_parameter(line).split(',')
            line_split[0] = str(float(line_split[0]) + soildetail_max)
            line_split[2] = str(float(line_split[2]) + soilprop_max)
            new_line = '<SoilDetail>' + ','.join(line_split) + '</SoilDetail>\n'
            soildetail_lines.append(new_line)

0 C:/Users/nbs65/Newcastle University/Anna Murgatroyd - Shetran-Anglian/05 - SHETRAN Modelling/03 Uncalibrated APM Models with Superficials\Heigham\Heigham_LibraryFile.xml
1 C:/Users/nbs65/Newcastle University/Anna Murgatroyd - Shetran-Anglian/05 - SHETRAN Modelling/04 Optimised Models with Superficials\34004\34004_LibraryFile.xml
2 C:/Users/nbs65/Newcastle University/Anna Murgatroyd - Shetran-Anglian/05 - SHETRAN Modelling/04 Optimised Models with Superficials\34005\34005_LibraryFile.xml


## Rewrite the new Library File and model files:

Slip the combined vegetations and soils into the original library file.

Then save them to a new folder.

Then go back through and remove duplicates.

In [27]:
# Clean the existing lines_master of the lines that we want to replace:
lines_cleaned = remove_library_lines(lines_master, 'VegetationDetail')
lines_cleaned = remove_library_lines(lines_cleaned, 'SoilProperty')
lines_cleaned = remove_library_lines(lines_cleaned, 'SoilDetail')

# Add the updated lines in lines_updated:
lines_updated = add_library_lines(
    lines_cleaned, cover_lines, 'VegetationDetails', 
    header='<VegetationDetail>Veg Type #, Vegetation Type, Canopy storage capacity (mm), Leaf area index, Maximum rooting depth(m), AE/PE at field capacity,Strickler overland flow coefficient</VegetationDetail>\n')

lines_updated = add_library_lines(
    lines_updated, soilprop_lines, 'SoilProperties',
    header='<SoilProperty>Soil Number,Soil Type, Saturated Water Content, Residual Water Content, Saturated Conductivity (m/day), vanGenuchten- alpha (cm-1), vanGenuchten-n</SoilProperty> Avoid spaces in the Soil type names\n')

lines_updated = add_library_lines(
    lines_updated, soildetail_lines, 'SoilDetails',
    header='<SoilDetail>Soil Category, Soil Layer, Soil Type, Depth at base of layer (m)</SoilDetail>\n')

## Write the files to a new folder:

In [28]:
output_folder = os.path.dirname(output_Library_path)

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Write the updated library file:
with open(os.path.join(output_Library_path), 'w') as file:
    file.writelines(lines_updated)

# Get file paths from the library file:
subsurface_path = [extract_xml_parameter(l) for l in lines_updated if l.startswith('<SoilMap>')][0]
landcover_path = [extract_xml_parameter(l) for l in lines_updated if l.startswith('<VegMap>')][0]
lake_path = [extract_xml_parameter(l) for l in lines_updated if l.startswith('<LakeMap>')][0]

# Write the updated raster files:
smf.write_ascii(subsurface_master, os.path.join(output_folder, subsurface_path),
                xllcorner=xll_l, yllcorner=yll_l, cellsize=cellsize_l, 
                ncols=ncols_l, nrows=nrows_l)

smf.write_ascii(landcover_master, os.path.join(output_folder, landcover_path),
                xllcorner=xll_l, yllcorner=yll_l, cellsize=cellsize_l, 
                ncols=ncols_l, nrows=nrows_l)

smf.write_ascii(lake_master, os.path.join(output_folder, lake_path),
                xllcorner=xll_l, yllcorner=yll_l, cellsize=cellsize_l, 
                ncols=ncols_l, nrows=nrows_l)

# Copy the remaining files to the new folder:
smf.copy_library_inputs(
        library_filepath=model_paths[0], 
        new_folder=output_folder,
        required_files=["<DEMMeanFileName>", "<DEMminFileName>", "<MaskFileName>", 
                        "<PrecipMap>", "<PeMap>", 
                        "<PrecipitationTimeSeriesData>", 
                        "<EvaporationTimeSeriesData>", 
                        "<MaxTempTimeSeriesData>"
                        ],
                        copy_library=False)  # copy_library must be False, else you'll get the original library file.


## Simplify the Files

The library files and rasters are now probably very full of duplicated values. You may want to leave these so that you can distinguish between the soils in the different catchments (i.e. if you want to calibrate them individually).

If you want to simplify the catchment however, then the code below will remove duplicates from the soil and vegetation rasters and library tables.

In [None]:
library_path_to_simplify = "S:/11 - Anglian Water Catchments (HEIF Grant 2025)/New setup with superficials/34004 Merged/34004_LibraryFile.xml"

# Load the model files:
with open(library_path_to_simplify, 'r') as file:
    lines = file.readlines()

# Get file paths from the library file:
subsurface_path = [extract_xml_parameter(l) for l in lines if l.startswith('<SoilMap>')][0]
landcover_path = [extract_xml_parameter(l) for l in lines if l.startswith('<VegMap>')][0]
lake_path = [extract_xml_parameter(l) for l in lines if l.startswith('<LakeMap>')][0]

# Add in the library folder path:
base_path = os.path.dirname(library_path_to_simplify)
subsurface_path = os.path.join(base_path, subsurface_path)
landcover_path = os.path.join(base_path, landcover_path)
lake_path = os.path.join(base_path, lake_path)

# Load in the subsurface raster files so that they can be merged with another on the next loop:
subsurface_master, ncols_l, nrows_l, xll_l, yll_l, cellsize_l, nodata_l, _, _ = smf.read_ascii_raster(subsurface_path).astype(int)
landcover_master = smf.read_ascii_raster(landcover_path, return_metadata=False).astype(int)
lake_master = smf.read_ascii_raster(lake_path, return_metadata=False).astype(int)

# Collate the master details:
cover_lines, soilprop_lines, soildetail_lines = [], [], []
for line in lines:
    if line.startswith('<VegetationDetail>'):
        cover_lines.append(line)
    elif line.startswith('<SoilProperty>'):
        soilprop_lines.append(line)
    elif line.startswith('<SoilDetail>'):
        soildetail_lines.append(line)
    else:
        continue

# map, mc, mr, mx, my, mcs, mnd, _, _ = read_ascii_raster(map_path, data_type=int, replace_NA=False)
veg_cols = ['Veg Type', 'Vegetation Type', 'Canopy storage capacity (mm)', 'Leaf area index', 'Maximum rooting depth(m)', 'AE/PE at field capacity', 'Strickler overland flow coefficient']
soil_prop_cols = ['Soil Number', 'Soil Type', 'Saturated Water Content', 'Residual Water Content', 'Saturated Conductivity (m/day)', 'vanGenuchten- alpha (cm-1)', 'vanGenuchten-n']
soil_detail_cols = ['Soil Category', 'Soil Layer', 'Soil Type', 'Depth at base of layer (m)']

# --- Remove duplicates from the Land Cover table:
group_cols = [v for v in veg_cols if v != 'Veg Type']

# Convert to a dataframe:
table = [extract_xml_parameter(line).split(',') for line in cover_lines[1:]]
table = pd.DataFrame(table, columns=veg_cols)

# Group the rows in table using all columns except 'Veg Type':
groups = table.groupby(group_cols)['Veg Type'].apply(list).reset_index()['Veg Type']

# Run throug the groups:
for group in groups:
    # Find minimum ID: 
    new_ID = min([float(x) for x in group])
    
    # Change the duplicated IDs to the new ID in the map:
    for old_ID in group:
        landcover_master[landcover_master == float(old_ID)] = new_ID

table.drop_duplicates(subset=group_cols, keep='first', inplace=True)

# Reset the indexes so that they run consecutively:
counter = 1
for old_ID in sorted(table['Veg Type']):
    print(old_ID, counter)
    # Table:
    table.loc[table['Veg Type'] == old_ID, 'Veg Type'] = counter
    # Map
    landcover_master[landcover_master == int(old_ID)] = counter
    counter+=1

# Convert the table back to lines:
cover_lines_updated = ['<VegetationDetail>' + ','.join([str(v) for v in row]) + '</VegetationDetail>\n' for i, row in table.iterrows()]

# Write out the simplified land cover map:
smf.write_ascii(array=landcover_master, ascii_ouput_path=landcover_path.replace('.asc', f'_simple.asc'),
                xllcorner=xll_l, yllcorner=yll_l, cellsize=cellsize_l, ncols=ncols_l, nrows=nrows_l, 
                NODATA_value=nodata_l, data_format= '%1.0f')

# --- Remove duplicates from the Soil Number table:

# --- Now change edit the soil details to match the de-duplicated Soil numbers produced above.

# --- Now remove duplicates in the Soil Details Table and the Subsurface Map.


In [None]:
# USE THIS SECTION FOR MERGING THE LIENS BACK INTO THE LIBRARY FILE:
lines_cleaned = remove_library_lines(lines, 'VegetationDetail')
lines_updated = add_library_lines(
    lines_cleaned, table_lines, 'VegetationDetails', 
    '<VegetationDetail>Veg Type #, Vegetation Type, Canopy storage capacity (mm), Leaf area index, Maximum rooting depth(m), AE/PE at field capacity,Strickler overland flow coefficient</VegetationDetail>\n')
lines_updated

# # Remove the duplicated rows from the table and write it to csv:
# table.to_csv(table_path.replace('.csv', f'{output_suffix}.csv'), index=False)

## Notes

In [None]:
landcover_master[landcover_master == -9999] = 0
plt.imshow(landcover_master, cmap='viridis')

In [184]:
pd.Series(landcover_master.flatten()).unique()

array([    1, -9999,     2,     4,     5,     3])