In [None]:
%%capture
!pip install numpy matplotlib scikit-image plotly tensorflow torch pandas xgboost scikit-learn

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from os import listdir
import plotly.express as px
import skimage
from skimage.measure import block_reduce
import pandas as pd
from xgboost import XGBRegressor
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error


#Model Training Data Prep

## Load and extract data

In [None]:
#Get Train Images
!wget -O images_train.zip "https://uni-muenster.sciebo.de/s/Ye3GpOyPj0rCeTh/download?path=%2F&files=images_train.zip"
!unzip -q images_train.zip
!wget -O masks_train.zip "https://uni-muenster.sciebo.de/s/Ye3GpOyPj0rCeTh/download?path=%2F&files=masks_train.zip"
!unzip -q masks_train.zip

--2023-07-04 20:46:17--  https://uni-muenster.sciebo.de/s/Ye3GpOyPj0rCeTh/download?path=%2F&files=images_train.zip
Resolving uni-muenster.sciebo.de (uni-muenster.sciebo.de)... 128.176.1.2
Connecting to uni-muenster.sciebo.de (uni-muenster.sciebo.de)|128.176.1.2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 213887376 (204M) [application/zip]
Saving to: ‘images_train.zip’


2023-07-04 20:46:26 (25.4 MB/s) - ‘images_train.zip’ saved [213887376/213887376]

--2023-07-04 20:46:33--  https://uni-muenster.sciebo.de/s/Ye3GpOyPj0rCeTh/download?path=%2F&files=masks_train.zip
Resolving uni-muenster.sciebo.de (uni-muenster.sciebo.de)... 128.176.1.2
Connecting to uni-muenster.sciebo.de (uni-muenster.sciebo.de)|128.176.1.2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 365001 (356K) [application/zip]
Saving to: ‘masks_train.zip’


2023-07-04 20:46:34 (690 KB/s) - ‘masks_train.zip’ saved [365001/365001]



In [None]:
#Load Images & Masks in NP Arrays, transpose axis correctly, load public test image
from pathlib import Path
NUM_IMAGES = 20
images = np.array([np.load(Path(f'images/image_{n:03}.npy')) for n in range(NUM_IMAGES)]).transpose((0,2,3,1))
masks = np.array([np.load(Path(f'masks/mask_{n:03}.npy')) for n in range(NUM_IMAGES)]).transpose((0,2,3,1))
#public_test = np.array([np.load(Path(f'public_test_image.npy'))]).transpose((3,2,1,0))

In [None]:
# Get shape of images
N, X, Y, C = images.shape

(32239, 10) (32239, 1) 20 1024 1024 10 (20, 1024, 1024)


## Cropped Images Preprocessing

In [None]:
CROPPING_OFFSET = 2 # Defines the pixel offset in every direction

In [None]:
# Padding images to allow cropping of edge values
padding_tuple = ((0,0), (CROPPING_OFFSET,CROPPING_OFFSET), (CROPPING_OFFSET,CROPPING_OFFSET), (0,0))
images_pad = np.pad(images, padding_tuple) # Padding space is filled with zeor values

In [None]:
keypoints = masks.nonzero() # Get coordinates of measured height values
keypoints_val = masks[keypoints] # Get measured height values

In [None]:
images_crop = []

for i in range(len(keypoints[0])):
  p = keypoints[0][i]                   # Picture indice
  x = keypoints[1][i] + CROPPING_OFFSET # X indice in padded image
  y = keypoints[2][i] + CROPPING_OFFSET # Y indice in padded image
  v = keypoints_val[i]                  # Height value
  image_crop = images_pad[p, x-CROPPING_OFFSET : x+CROPPING_OFFSET+1, y-CROPPING_OFFSET: y+CROPPING_OFFSET+1,:]
  #print(f"X:{x} Y:{y} | Crop X start:{x-CROPPING_OFFSET} end:{x+CROPPING_OFFSET+1} Y start:{y-CROPPING_OFFSET} end:{y+CROPPING_OFFSET+1}")

  images_crop.append([p,x,y,v,image_crop])

images_crop = pd.DataFrame(images_crop, columns=["p","x","y","v","image"])

In [None]:
# Prepare train, test split with segmentation of indices
n_data = len(images_crop)
indices = np.arange(n_data)
np.random.shuffle(indices) # Shuffle order of cropped image indices
train_indices, test_indices= indices[:int(n_data*0.8)], indices[int(n_data*0.8):]

In [None]:
crop_train_x, crop_train_y = np.array(list(images_crop.image[train_indices])), np.array(images_crop.v[train_indices])
crop_test_x, crop_test_y = np.array(list(images_crop.image[test_indices])), np.array(images_crop.v[test_indices])

In [None]:
# Reshape for Training of XGB Model and for Testing - Flatten Train/Test x
P, X, Y, C = crop_train_x.shape
crop_train_x = crop_train_x.reshape(P, -1)
P, X, Y, C = crop_test_x.shape
crop_test_x = crop_test_x.reshape(P, -1)


## XGB Model Training

In [None]:
params = {
    'booster':  'gbtree', #  The default, outperforms gblinear
    'objective': 'reg:absoluteerror', #previously reg:squarederror
    'learning_rate': 0.3,
    'n_estimators': 500,
    'eval_metric': 'mae' # previously rmse

}

In [None]:
xgb_model_crop = XGBRegressor(**params, max_depth = 10)

In [None]:
xgb_model_crop.fit(crop_train_x, crop_train_y)

In [None]:
crop_pred_y = xgb_model_crop.predict(crop_test_x)

print(mean_absolute_error(crop_test_y, crop_pred_y), mean_squared_error(crop_test_y, crop_pred_y))

4.170457353415638 46.51940552120927


In [None]:
xgb_model_crop.save_model("model_xgb_crop_v1.json")