https://github.com/ukcp-data/ukcp-spatial-files/tree/master/spatial-files/ukcp18-uk-land-5km
https://catalogue.ceda.ac.uk/uuid/e304987739e04cdc960598fa5e4439d0/

In [None]:
import xarray as xr
import os
import glob
import pandas as pd
import geopandas as gpd
from rasterio import features
import rasterio
from shapely.geometry import shape
import os
from shapely.geometry import shape
from shapely.geometry.polygon import orient
import json
import xclim.indicators.atmos as xci
import sys
from collections import defaultdict

# def change_from_baseline(data, output, name, period=15):
#     first_period = data.isel(year=slice(0, period))
#     baseline_mean = first_period.mean(dim='year')

#     change_from_baseline = data - baseline_mean

#     trimmed_change = change_from_baseline.isel(year=slice(15, None))

#     change_path = os.path.join(output, '01_rolling', name)
#     trimmed_change.to_netcdf(change_path)
# change_from_baseline(merged_ds, output_folder, "rolling_change_from_baseline.nc", period=15)

In [3]:
stats = ["max", "min", "med"]
rolling_period = 30
output_folder = "./processed/pr"
input_folder = "./raw/pr"
models = ["01", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "15", "23", "25", "27", "29"]

In [None]:
def combine_and_extract(model_number):
    nc_file_list = glob.glob(os.path.join(input_folder,model_number, "*.nc"))
    combined_ds = xr.open_mfdataset(nc_file_list, combine='by_coords')

    #Calculate SPI
    monthly_ds_spi = xci.standardized_precipitation_index(
        combined_ds.pr, 
        window=3, 
        freq='MS',
        dist="gamma",
        method="APP",
        cal_end="2010-12-30",
        fitkwargs={"floc": 0},
    )

    months_below_minus1 = monthly_ds_spi < -2
    yearly_avg_spi = months_below_minus1.groupby('time.year').mean(dim='time', keep_attrs=True)
    yearly_avg_spi = yearly_avg_spi * 12
    rolling_spi = yearly_avg_spi.rolling(year=30, center=True, min_periods=1).mean()

    yearly_groups = []
    for i, (year, yearly_ds) in enumerate(combined_ds.groupby('time.year')):

        # Highest daily rainfall in the year
        augmented_yearly_data = yearly_ds.max(dim='time', keep_attrs=True)
        
        # Total rainfall in the year
        yearly_sum = yearly_ds['pr'].sum(dim='time', keep_attrs=True)
        augmented_yearly_data['sum'] = yearly_sum
        
        # Rainfall on wettest 3 day period
        rolling_3day_sum = yearly_ds['pr'].rolling(time=3, min_periods=3).sum()
        max_3day_sum = rolling_3day_sum.max(dim='time', keep_attrs=True)
        augmented_yearly_data['max_3day_sum'] = max_3day_sum
        
        # Days under 0.2mm rainfall
        days_under_1mm = (yearly_ds['pr'] < 0.2).sum(dim='time', keep_attrs=True)
        augmented_yearly_data['days_under_1mm'] = days_under_1mm
        
        yearly_groups.append(augmented_yearly_data)
            
    # Concatenate all years along a new 'year' dimension
    merged_ds = xr.concat(yearly_groups, dim='year')

    combined_ds.close()
    merged_ds['rolling_sum'] = merged_ds['sum'].rolling(year=rolling_period, center=True, min_periods=1).mean()
    merged_ds['rolling_max_3day_sum'] = merged_ds['max_3day_sum'].rolling(year=rolling_period, center=True, min_periods=1).mean()
    merged_ds['rolling_pr'] = merged_ds['pr'].rolling(year=rolling_period, center=True, min_periods=1).mean()
    merged_ds['rolling_spi'] = rolling_spi
    merged_ds = merged_ds.drop_vars(['sum', 'max_3day_sum', 'pr'])

    # print("Merged dataset ready for netcdf: ", sys.getsizeof(merged_ds), " bytes")

    out_rolling_path = os.path.join(output_folder, '01_rolling', f"rolling_average_{model_number}.nc")
    merged_ds.to_netcdf(out_rolling_path)

In [None]:
select_models = ["15", "23", "25", "27", "29"]

for model in select_models:
    print(f"Processing model {model}")
    combine_and_extract(model)


### Characterise models

For the different model data downloaded, find the median, min and max of the datasets and create 3 new files to use for processing

In [3]:
def characterise_rolling_averages():
    # Step 1: Load all NetCDF files
    file_paths = glob.glob("./processed/pr/01_rolling/*.nc")  # Replace with your actual directory

    # Step 2: Open them as a multi-file dataset (combine along 'ensemble_member')
    datasets = [xr.open_dataset(fp, decode_times=False) for fp in file_paths]
    combined = xr.concat(datasets, dim='ensemble_member')

    # Step 3: Compute statistics across the ensemble dimension
    median_ds = combined.median(dim='ensemble_member', keep_attrs=True)
    min_ds = combined.min(dim='ensemble_member', keep_attrs=True)
    max_ds = combined.max(dim='ensemble_member', keep_attrs=True)

    # Step 4: Save each result to new .nc files
    median_ds.to_netcdf("./processed/pr/01-1_stats/precipitation_median.nc")
    min_ds.to_netcdf("./processed/pr/01-1_stats/precipitation_min.nc")
    max_ds.to_netcdf("./processed/pr/01-1_stats/precipitation_max.nc")

In [4]:
characterise_rolling_averages()

### Manual processing

Before running the next steps, load the created .nc file into QGIS, use the batch function to clip the layers, without manually reprojecting them

In [14]:
def tif_to_geojson(tif_folder, geojson_folder):
    changesYears = [35, 45, 55, 65, 75, 85]
    absYears = [50, 60, 70, 80, 90, 100]
    os.makedirs(geojson_folder, exist_ok=True)
    tif_files = glob.glob(os.path.join(tif_folder, "*.tif"))
    for tif_file in tif_files:
        years = absYears #if 'Absolute' in os.path.basename(tif_file) else changesYears
        with rasterio.open(tif_file) as src:
            # Try to get timebands from metadata (e.g., tags or band descriptions)
            timebands = []
            if src.count > 1:
                timebands = [src.descriptions[i] if src.descriptions[i] else f"{i+1}" for i in range(src.count)]
            else:
                timeband = src.tags().get('timeband', None)
                timebands = [timeband if timeband else "unknown"]

            # Only keep timebands that match the years list
            selected_indices = [i for i, tb in enumerate(timebands) if any(str(y) in str(tb) for y in years)]
            for band_idx in selected_indices:
                image = src.read(band_idx + 1)
                if image.dtype != 'float32':
                    image = image.astype('float32')
                mask = image != src.nodata
                shapes_gen = features.shapes(image, mask=mask, transform=src.transform)
                # print(os.path.basename(tif_file))
                # print(image.dtype)
                
                geoms = []
                for geom, value in shapes_gen:
                    if value != src.nodata:
                        oriented_geom = orient(shape(geom), sign=1.0)
                        geoms.append({
                            'geometry': oriented_geom,
                            'properties': {'value': value, 'timeband': timebands[band_idx]}
                        })
                gdf = gpd.GeoDataFrame.from_features(geoms, crs=src.crs)
                gdf = gdf.to_crs("EPSG:4326")
                geojson_path = os.path.join(
                    geojson_folder,
                    f"{os.path.splitext(os.path.basename(tif_file))[0]}_{timebands[band_idx]}.geojson"
                )
                gdf.to_file(geojson_path, driver="GeoJSON")


In [15]:
# Converts the reprojected and clipped tif files into geojson files at the different time steps

tif_to_geojson(os.path.join(output_folder, "02_clipped"), os.path.join(output_folder, "03_geojson"))

### Inverts JSONS
Inverts the storage of data so that it uses lat/lon as keys to reference all the values temporaly

In [17]:
def inverseGeojson(filename):
    file_paths = [
    os.path.join(output_folder, "03_geojson", f"{filename}_50.geojson"),
    os.path.join(output_folder, "03_geojson", f"{filename}_60.geojson"),
    os.path.join(output_folder, "03_geojson", f"{filename}_70.geojson"),
    os.path.join(output_folder, "03_geojson", f"{filename}_80.geojson"),
    os.path.join(output_folder, "03_geojson", f"{filename}_90.geojson"),
    os.path.join(output_folder, "03_geojson", f"{filename}_100.geojson"),
]

    file_labels = [
        "2030",
        "2040",
        "2050",
        "2060",
        "2070",
        "2080"
    ]

    # Dictionary to store: { coordinate: [value1, value2, ...] }
    coordinate_data = defaultdict(lambda: [None] * len(file_paths))

    # Process each file
    for idx, file_path in enumerate(file_paths):
        with open(file_path, 'r') as f:
            geojson = json.load(f)
            for feature in geojson.get("features", []):
                geometry = feature.get("geometry", {})
                properties = feature.get("properties", {})
                value = properties.get("value")  # change if your key is different

                if geometry.get("type") == "Polygon":
                    coords = geometry.get("coordinates", [])
                    if coords and coords[0]:  # outer ring exists
                        raw_coord = coords[0][0]  # first coordinate of outer ring
                        # Format coordinate to 10 decimal places, pad with zeros if necessary
                        coord_key = tuple([f"{c:.10f}" for c in raw_coord])
                        coordinate_data[coord_key][idx] = value

    # Convert to DataFrame
    df = pd.DataFrame([
        {"Coordinate": coord, **{file_labels[i]: values[i] for i in range(len(file_labels))}}
        for coord, values in coordinate_data.items()
    ])

    # Optional: remove rows where all values are None
    # df = df.dropna(subset=file_labels, how='all')

    # Show result
    df.to_json(os.path.join(output_folder, "04_inverse", f"{filename}_inverse.json"), orient='records', indent=4)

In [None]:

for stat in stats:
    inverseGeojson(f"PR_{stat}_3day")
    inverseGeojson(f"PR_{stat}_dry")
    inverseGeojson(f"PR_{stat}_maxPR")
    inverseGeojson(f"PR_{stat}_spi")
    inverseGeojson(f"PR_{stat}_yearlySum")


### Merging JSONS for graph

In [6]:
def graphData(stat):
    file_paths = {
        "PR_3day": os.path.join(output_folder, "04_inverse", stat,f"PR_{stat}_3day_inverse.json"),
        "PR_maxPR":os.path.join(output_folder, "04_inverse", stat, f"PR_{stat}_maxPR_inverse.json"),
        "PR_yearlySum":os.path.join(output_folder, "04_inverse", stat, f"PR_{stat}_yearlySum_inverse.json"),
        "PR_dry":os.path.join(output_folder, "04_inverse", stat, f"PR_{stat}_dry_inverse.json"),
        "PR_spi":os.path.join(output_folder, "04_inverse", stat, f"PR_{stat}_spi_inverse.json"),
    }
    
    data_by_metric = {}

    # Load each file and organize by coordinate
    for metric, path in file_paths.items():
        with open(path, 'r') as f:
            records = json.load(f)
            for entry in records:
                # Convert coordinate list to tuple for use as a dictionary key
                coord_key = tuple(entry["Coordinate"])
                if coord_key not in data_by_metric:
                    data_by_metric[coord_key] = {}
                # Extract values for 2030 to 2080 in 10-year steps
                values = [entry.get(str(year)) for year in range(2030, 2090, 10)]
                data_by_metric[coord_key][metric] = values

    # Convert coordinate tuples to strings for JSON serialization
    final_output = {str(k): v for k, v in data_by_metric.items()}

    # Save the final merged dictionary as a new JSON file
    output_file = os.path.join(output_folder, f"04_inverse/{stat}/graph_data_{stat}.json")
    with open(output_file, 'w') as out_file:
        json.dump(final_output, out_file, indent=2)

In [7]:
for stat in stats:
    graphData(stat)