In [None]:
"""
Classification of the Planet SuperDove imagery from the XGBoost model
Author: maxwell.cook@colorado.edu
"""

import os, time, sys
import xgboost as xgb

# Functions script import
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

print(os.getcwd())

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/earth-lab/opp-rooftop-mapping'
# homedir = '/home/jovyan/data-store/data/iplant/home/maco4303/data/' # cyverse

print("Imports successful!")

In [None]:
results_dir = os.path.join(maindir, 'results/xgboost-cv/classification/')
# results_dir = os.path.join(homedir, 'OPP/results/xgboost-cv/classification/') # cyverse
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
print(f"Saving results to {results_dir}")

In [None]:
# Load the reference footprint data

In [None]:
ref_fp = os.path.join(maindir,'data/spatial/mod/dc_data/training/dc_data_reference_footprints.gpkg')
ref = gpd.read_file(ref_fp)
n_classes = len(ref['class_code'].unique())
ref.head()

In [None]:
# Create a dictionary mapping class_code to numeric code
ref['code'], _ = pd.factorize(ref['class_code'])
code_mapping = dict(zip(ref['class_code'], ref['code']))
desc_mapping = dict(zip(ref['class_code'], ref['description']))
print(f'Code map: \n{code_mapping}\nDescription map: \n{desc_mapping}')

In [None]:
# Load the best XGBoost model from cross-validation based on holdout accuracy

In [None]:
best_fold = 4
best_params = {
    'learning_rate': 0.002185884764883822, 
    'max_depth': 12, 
    'n_estimators': 837, 
    'min_child_weight': 2, 
    'subsample': 0.5014281326674929, 
    'colsample_bytree': 0.9226235922462854, 
    'gamma': 0.8462080004717595
}
print(best_params)

In [None]:
# Load the trained model for best fold
model_fp = os.path.join(maindir, f'results/xgboost-cv/dc-xgb_fold{best_fold}.model')
print(f"Loading model from path: {model_fp}")

xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=n_classes,
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    n_estimators=best_params['n_estimators'],
    min_child_weight=best_params['min_child_weight'],
    subsample=best_params['subsample'],
    colsample_bytree=best_params['colsample_bytree'],
    gamma=best_params['gamma'],
    eval_metric=['mlogloss', 'auc', 'merror'], # track multiple evaluation metrics
    early_stopping_rounds=100, # stop if no improvement
    random_state=44
)

xgb_model.load_model(model_fp)
print("Model loaded !")

In [None]:
# Load the Planet imagery

In [None]:
# Load our image data to check on the format
stack_da_fp = os.path.join(maindir,'data/spatial/mod/dc_data/planet-data/dc_0623_psscene8b_final_norm.tif')
# stack_da_fp = os.path.join(homedir,'OPP/planet/dc/dc_0623_psscene8b_final_norm.tif') # cyverse
stack_da = rxr.open_rasterio(stack_da_fp, masked=True, cache=False).squeeze()
print_raster(stack_da, open_file=False)
band_names = list(stack_da.long_name)
print(f"Band names: {band_names}")

In [None]:
# Flatten the image for classification
stack_da_flat = flatten_array(stack_da)
stack_da_flat.shape

In [None]:
# Make predictions on the Planet imagery

In [None]:
t0 = time.time()

# Make predictions on the planet array
preds_ = xgb_model.predict(stack_da_flat)
# Create a new datarray from the predictions, matching to the original Planet imagery
# Export to a GeoTIFF
out_fp = os.path.join(results_dir, 'dc-xgboost-best_roof_materials_pred.tif')
preds_da = array_toxrda(preds_, stack_da, dtype="uint8", clip=False, export=True, out_fp=out_fp)

print("\n~~~~~~~~~~~~~~~~~~~\n")
t1 = (time.time() - t0) / 60
print(f"Total elapsed time: {t1:.2f} minutes.")