# Cyprus Terrace Detection - Model Inference
Apply trained models to predict terraces across Cyprus territory

In [None]:
import os
import geopandas as gpd
import pandas as pd
import numpy as np
from autogluon.tabular import TabularPredictor
import glob
import re
import multiprocessing
from joblib import Parallel, delayed

# Load trained models
predictor_terrace = TabularPredictor.load('C:/Workdir/test_autogluon/model_CyprusSENtrain_rs22matchBEST')
predictor_ancient = TabularPredictor.load('C:/Workdir/test_autogluon/model_CyprusMultinewBEST')

## 1. Prepare Input Files

In [None]:
# Load all input GPKG files
folder_files = "E:/Cyprus_paper_data/grid_all_polysSen"
all_files_paths = glob.glob(folder_files + "/*.gpkg")

print(f"Total files found: {len(all_files_paths)}")

# Optional: Filter specific files if needed
# pattern = re.compile(r"_([1-9]|[1-4][0-9]|5[0-6])_stats")
# selected_files = [file for file in all_files_paths if pattern.search(file)]

## 2. Two-Stage Prediction Pipeline

In [None]:
def apply_models_to_gpkg(gpkg_path, output_dir):
    """
    Two-stage prediction: (1) Binary terrace detection, (2) Multiclass terrace type.
    Applies both thresholds (0.5 and 0.67) and saves predictions with original labels.
    """
    # Load GeoPackage
    gdf = gpd.read_file(gpkg_path)
    
    # Store original columns
    geometry_col = gdf['geometry']
    terrace_col = gdf['terrace'] if 'terrace' in gdf.columns else None
    ancient_col = gdf['ancient'] if 'ancient' in gdf.columns else None
    
    # Drop non-feature columns
    gdf = gdf.drop(columns=['terrace', 'ancient', 'geometry', 'name', 'gridnumber', 'PXLVAL', 'Selected'], errors='ignore')
    
    # Stage 1: Binary terrace detection
    predictions_terrace = predictor_terrace.predict_proba(gdf)
    gdf['terrace_prob'] = predictions_terrace.iloc[:, 1]
    gdf['terrace_binary_0.5'] = (gdf['terrace_prob'] >= 0.5).astype(int)
    gdf['terrace_binary_0.67'] = (gdf['terrace_prob'] >= 0.6707).astype(int)
    
    # Override predictions with ground truth if available (for training data)
    if terrace_col is not None:
        mask_terrace = (terrace_col == 1) & (gdf['terrace_prob'] < 0.6707)
        gdf.loc[mask_terrace, ['terrace_binary_0.67', 'terrace_binary_0.5', 'terrace_prob']] = [1, 1, 0.8]
        
        mask_non_terrace = (terrace_col == 0) & (gdf['terrace_prob'] > 0.5)
        gdf.loc[mask_non_terrace, ['terrace_binary_0.67', 'terrace_binary_0.5', 'terrace_prob']] = [0, 0, 0.49]
    
    # Stage 2: Multiclass terrace type (only for predicted terraces)
    for threshold, col_name in [(0.6707, 'ancient_pred_067'), (0.5, 'ancient_pred_05')]:
        gdf_terrace = gdf.loc[gdf[f'terrace_binary_{threshold if threshold == 0.6707 else "0.5"}'] == 1].copy()
        
        if len(gdf_terrace) > 0:
            predictions_ancient = predictor_ancient.predict(gdf_terrace)
            gdf_terrace[col_name] = predictions_ancient
            
            # Override with ground truth if available
            if ancient_col is not None:
                gdf_terrace.loc[ancient_col.notnull(), col_name] = ancient_col.loc[ancient_col.notnull()]
            
            gdf = gdf.merge(gdf_terrace[[col_name]], left_index=True, right_index=True, how='left')
    
    # Restore original columns
    gdf['geometry'] = geometry_col
    if terrace_col is not None:
        gdf['terrace_labeltrue'] = terrace_col
    if ancient_col is not None:
        gdf['ancient_labeltrue'] = ancient_col
    
    # Save predictions
    gdf = gpd.GeoDataFrame(gdf)
    output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(gpkg_path))[0] + "_pred.gpkg")
    gdf.to_file(output_path, driver='GPKG')
    
    print(f"✓ Processed: {os.path.basename(gpkg_path)}")

## 3. Batch Processing

In [None]:
# Setup output directory
output_folder = "E:/Cyprus_paper_data/predictionsSENnew"
os.makedirs(output_folder, exist_ok=True)

# Single file test
print("Testing on single file...")
apply_models_to_gpkg(all_files_paths[0], output_folder)

In [None]:
# Parallel processing for all files
num_cores = multiprocessing.cpu_count() - 8

def process_polygon(polygon_file):
    apply_models_to_gpkg(polygon_file, output_folder)

print(f"Processing {len(all_files_paths)} files using {num_cores} cores...")
Parallel(n_jobs=num_cores)(delayed(process_polygon)(file) for file in all_files_paths)

print(f"\n✓ All predictions saved to: {output_folder}")