This notebook explores the use raw data and LightGBM algorithm for forest stand classification.

In [1]:
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from sklearn import metrics
import lightgbm as lgb

In [2]:
name_map = {'Larix decidua': 0,
 'Pinus sylvestris': 1,
 'Broadleaved trees': 2,
 'Picea abies': 3,
 'Pinus mugo': 4,
 'Pinus cembra': 5,
 'Abies alba': 6}
 # reverse the dictionary
value_map = {v: k for k, v in name_map.items()}

In [3]:
# load all features
years = ["2018", "2019", "2020", "2021", "2022", "2023", "2024"]
bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'ndvi']
DATA_PATH = "data/"

Model training and prediction using K-Fold validation

In [4]:
# Load raw data for all years
all_results = {}
for year in years:
    # Load raw data
    raw_df = pd.read_csv(DATA_PATH + year + "_raw.csv", index_col=0, low_memory=False)
    raw_df.columns = ["band", "date", *[int(x) for x in raw_df.columns[2:]]]
    y = raw_df.T.pop("class")[2:].astype(int)
    raw_df = raw_df.drop("class")
    raw_df.date = pd.to_datetime(raw_df.date)
    raw_df = raw_df.groupby(["band", "date"]).mean()
    raw_df = raw_df.loc[bands]

    # Initialize K-fold
    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    # Store metrics for each fold
    lgbm_acc = []
    lgbm_f1 = []

    # Perform k-fold cross validation
    for fold, (train_idx, val_idx) in enumerate(kf.split(raw_df.T)):    
        # Prepare training data
        X_train = raw_df.T.iloc[train_idx].values
        X_val = raw_df.T.iloc[val_idx].values
        y_train = y[train_idx]
        y_val = y[val_idx]
        
        # Set up the LightGBM model
        model_lgbm = lgb.LGBMClassifier(
            objective='multiclass',
            num_class=len(np.unique(y)),
            metric='multi_logloss',
            verbose=-1
        )
        
        # Train the model
        model_lgbm.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model_lgbm.predict(X_val)
        
        # Calculate metrics
        acc = 100 * metrics.accuracy_score(y_val, y_pred)
        f1 = 100 * metrics.f1_score(y_val, y_pred, average='weighted')
        
        lgbm_acc.append(acc)
        lgbm_f1.append(f1)
    
    # Store results for this year
    all_results[year] = {'F1': np.mean(lgbm_f1), 'OA': np.mean(lgbm_acc)}
    
# Create DataFrame with F1 and OA for all years
df_metrics = pd.DataFrame(all_results).T
df_metrics.columns = ['F1', 'OA']
df_metrics.index.name = 'Year'
df_metrics.to_csv('results/lightGBM_raw.csv')
print(df_metrics.mean())



F1    90.613855
OA    90.667402
dtype: float64


