In [1]:
from utilsml import train_model
from utilsdf import list_files_by_tilenames
from uvars import tilenames_lidar,RES_DPATH
from utilsdf import assign_nulls, fillna,dropnulls_bycol,check_fillnulls, list_files_by_tilenames
from uvars import s1_fnames, s2_fnames,aux_names
import pandas as pd 
from sklearn.model_selection import train_test_split
import os 
from catboost import CatBoostRegressor
import rasterio
import numpy as np

In [2]:
def load_and_preprocess_pred_df(fparquet,s1_fnames,s2_fnames):
    df = pd.read_parquet(fparquet)
    df = assign_nulls(df)
    df = fillna(df, s1_fnames)
    df = fillna(df, s2_fnames)
    df = check_fillnulls(df)
    return df


def write_predictions_to_raster(predictions, tile_file, output_raster_path):
    """
    Writes predictions to a new raster file using metadata from an existing tile file.
    
    Parameters:
    - predictions (array-like): 1D array of predicted values matching the flattened raster size.
    - tile_file (str): Path to the raster file from which metadata will be read.
    - output_raster_path (str): Path where the new raster file will be saved.
    
    Returns:
    - None
    """
    # Read metadata from the tile file
    with rasterio.open(tile_file) as src:
        meta = src.meta.copy()
        raster_shape = (src.height, src.width)
        transform = src.transform
        crs = src.crs

    # Reshape predictions to match the raster's dimensions
    try:
        predictions_reshaped = np.array(predictions).reshape(raster_shape)
    except ValueError:
        raise ValueError(f"Predictions array size {len(predictions)} does not match raster dimensions {raster_shape}.")

    # Update metadata for writing a new raster
    meta.update({
        "dtype": rasterio.float32,  # Ensure predictions are stored as float32
        "count": 1,  # Single band
        "compress": "lzw"  # Optional: Add compression
    })

    # Write the new raster
    with rasterio.open(output_raster_path, "w", **meta) as dst:
        dst.write(predictions_reshaped.astype(rasterio.float32), 1)  # Write to the first band

    print(f"New raster saved to {output_raster_path}")

In [3]:
target_col = tcol = 'edem' # run for all 3 targets, and also use zdiff or not if going to donwscale 
features_col = aux_names + s1_fnames +s2_fnames
tilenames = tilenames_lidar

##### TRAIN 90 AND PREDICT 90

In [4]:
modeldpath = "/home/ljp238/Documents/UoE/libe_tabml/output/wb_baseline_loop/"
modelpath = f"{modeldpath}/edem_90000_catboost_model.txt"
model = CatBoostRegressor()
model.load_model(modelpath)

<catboost.core.CatBoostRegressor at 0x7eae9a759950>

In [5]:

X=90
fparquet_list,tile_files_list = list_files_by_tilenames(RES_DPATH, X, tilenames)


L = len(fparquet_list)
print(L)
for idx in range(L):
    fparquet,tile_files = fparquet_list[idx],tile_files_list[idx]
    df = load_and_preprocess_pred_df(fparquet,s1_fnames,s2_fnames)
    pred = model.predict(df[features_col])

    tile_ifile = [i for i in tile_files if 'edem_W84_90.tif' in i][0]
    tile_ofile = tile_ifile.replace('.tif', '__ML90M.tif')
    write_predictions_to_raster(pred, tile_ifile, tile_ofile)

6
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N09E105/N09E105_edem_W84_90__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N09E106/N09E106_edem_W84_90__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N10E104/N10E104_edem_W84_90__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N10E105/N10E105_edem_W84_90__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N10E106/N10E106_edem_W84_90__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES90/N13E103/N13E103_edem_W84_90__ML90M.tif


##### TRAIN 90 AND PREDICT 30

In [20]:
# X=30
# fparquet_list,tile_files_list = list_files_by_tilenames(RES_DPATH, X, tilenames)
# L = len(fparquet_list)
# print(L)
# for idx in range(L):
#     fparquet,tile_files = fparquet_list[idx],tile_files_list[idx]
#     df = load_and_preprocess_pred_df(fparquet,s1_fnames,s2_fnames)
#     pred = model.predict(df[features_col])

#     tile_ifile = [i for i in tile_files if 'edem_W84' in i][0]
#     tile_ofile = tile_ifile.replace('.tif', '__ML90M.tif')
#     write_predictions_to_raster(pred, tile_ifile, tile_ofile) # find the code that writes this in steps /blocks 

# 61 mins -- to expensive make it cheaper 
# did not work well, transferability across scales 

6
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N09E105/N09E105_edem_W84_30__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N09E106/N09E106_edem_W84_30__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N10E104/N10E104_edem_W84_30__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N10E105/N10E105_edem_W84_30__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N10E106/N10E106_edem_W84_30__ML90M.tif
New raster saved to /media/ljp238/12TBWolf/RSPROX/TILES30/N13E103/N13E103_edem_W84_30__ML90M.tif
