In [1]:
# library
from shapely.geometry import Point
import geopandas as gpd
from shapely import wkt
import geemap
import ee  
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from IPython.display import display 

In [2]:
# ------------------------
# 1. Initialize Earth Engine
# ------------------------
try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()

In [3]:
# ------------------------
# 2. Data Preprocessing Functions
# ------------------------
def convert_geometry_column(df):
    """converts WKT geometries to shapely objects."""
    df['geometry'] = df['geometry'].apply(lambda x: wkt.loads(x) if isinstance(x, str) else x)
    return df

def filter_valid_geometries(gdf):
    """filters out invalid or null geometries."""
    gdf = gdf[gdf['geometry'].notnull()]
    gdf = gdf[gdf.is_valid]
    return gdf

In [4]:
# ------------------------
# 3. Model Training Function
# ------------------------
def train_model(X_train, y_train, task_type):
    """trains a model based on classification or regression."""
    if task_type == "classification":
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    else:
        model = RandomForestRegressor(n_estimators=100, random_state=42)
    
    model.fit(X_train, y_train)
    return model

In [5]:
# ------------------------
# 4. Model Evaluation Function
# ------------------------
def evaluate_model(model, X_test, y_test, task_type):
    """evaluates model performance using appropriate metrics."""
    y_pred = model.predict(X_test)
    if task_type == "classification":
        score = accuracy_score(y_test, y_pred)
        print(f"Classification Accuracy: {score:.4f}")
    else:
        score = mean_squared_error(y_test, y_pred)
        print(f"Regression MSE: {score:.4f}")
    return score


In [6]:
# ------------------------
# 5. Feature Selection Function
# ------------------------
def get_feature_importance(model, selected_features):
    """retrieves feature importance from trained model."""
    importances = model.feature_importances_
    feature_importance = sorted(zip(selected_features, importances), key=lambda x: x[1], reverse=True)
    print("Feature Importances:")
    for feature, importance in feature_importance:
        print(f"{feature}: {importance:.4f}")

In [7]:
# ------------------------
# 6. Apply Model to Map
# ------------------------
def apply_model_to_map(model, df, selected_features):
    """applies trained model predictions to dataset and visualizes it on a map."""
    df = convert_geometry_column(df)
    df['prediction'] = model.predict(df[selected_features])
    
    gdf = gpd.GeoDataFrame(df, geometry=df['geometry'], crs='EPSG:4326')
    gdf = gdf.sample(n=250, random_state=42)
    
    try:
        gee_features = geemap.geopandas_to_ee(gdf)
        if gee_features is None:
            print("Error: geemap.geopandas_to_ee() returned None.")
            return None
    except Exception as e:
        print("Error converting GeoDataFrame to EE FeatureCollection:", e)
        return None
    
    Map = geemap.Map()
    Map.addLayer(gee_features, {}, 'Predictions')
    display(Map)
    #return Map

In [8]:
# ------------------------
# 7. Main Execution
# ------------------------
# load dataset
df = pd.read_csv('../notebook_outline_version/data/Costa_Classification_Data_Cleaned2.csv')


# drop duplicate plotIDs, keeping the first occurrence
df = df.drop_duplicates(subset=['plotid'], keep='first')

df = convert_geometry_column(df)

# user selects task type
task_type = input("Choose model type (classification/regression): ")

# define features and target
selected_features = ['BLUE', 'GREEN', 'NIR', 'RED', 'SWIR1', 'SWIR2', 'elevation', 'ndvi']
target = 'Vegetation' if task_type == "classification" else 'Forest_Percentage'

# split data
X = df[selected_features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# train model
model = train_model(X_train, y_train, task_type)

# evaluate model
evaluate_model(model, X_test, y_test, task_type)

# get feature importance
get_feature_importance(model, selected_features)

# apply model to dataset and map results
map_result = apply_model_to_map(model, df, selected_features)
if map_result:
    display(map_result)

Choose model type (classification/regression):  classification


Classification Accuracy: 0.6969
Feature Importances:
RED: 0.1394
SWIR2: 0.1376
GREEN: 0.1348
ndvi: 0.1295
SWIR1: 0.1233
elevation: 0.1231
NIR: 0.1064
BLUE: 0.1058


Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [13]:
df['Vegetation'].unique()

array(['Tree', 'Not_Applicable', 'Palms', 'Herbaceas', 'Bush',
       'Another Vegetation', 'Herbaceous', 'Plastico'], dtype=object)

In [11]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

def evaluate_model(model, X_test, y_test):
    """evaluates the trained model using accuracy, confusion matrix, and classification report."""
    
    # generate predictions
    y_pred = model.predict(X_test)

    # calculating accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model Accuracy: {accuracy:.4f}")

    # confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", cm)

    # classification report
    cr = classification_report(y_test, y_pred)
    print("\nClassification Report:\n", cr)

    #return accuracy, cm, cr

# run model evaluation
evaluate_model(model, X_test, y_test)

Model Accuracy: 0.6969

Confusion Matrix:
 [[   3    0   24   12   12    2   33]
 [   0    2   19    0    3    0   36]
 [  10    1  300    3   20    3  167]
 [   4    0   23   14    7    0   27]
 [   1    1   62    3   66    0   42]
 [   3    0    3    2    1    8   15]
 [   1    6  110    1   12   12 1173]]

Classification Report:
                     precision    recall  f1-score   support

Another Vegetation       0.14      0.03      0.06        86
              Bush       0.20      0.03      0.06        60
         Herbaceas       0.55      0.60      0.57       504
        Herbaceous       0.40      0.19      0.25        75
    Not_Applicable       0.55      0.38      0.45       175
             Palms       0.32      0.25      0.28        32
              Tree       0.79      0.89      0.84      1315

          accuracy                           0.70      2247
         macro avg       0.42      0.34      0.36      2247
      weighted avg       0.66      0.70      0.67      2247



In [None]:
#'Tree', 'Not_Applicable', 'Palms', 'Herbaceas', 'Bush',
#       'Another Vegetation', 'Herbaceous', 'Plastico'