This notebook explores using monthly composites.

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from sklearn import metrics
import lightgbm as lgb

Variables

In [3]:
name_map = {'Larix decidua': 0,
 'Pinus sylvestris': 1,
 'Broadleaved trees': 2,
 'Picea abies': 3,
 'Pinus mugo': 4,
 'Pinus cembra': 5,
 'Abies alba': 6}
 # reverse the dictionary
value_map = {v: k for k, v in name_map.items()}

In [4]:
# features
years = ["2018", "2019", "2020", "2021", "2022", "2023", "2024"]
bands = ['ndvi']
DATA_PATH = "data/"
files = [ "_raw.csv", "_syn_raw.csv"] 
data_names = [x[1:].split(".")[0] for x in files]
names_id = dict((j,i) for i,j in enumerate(data_names))

# Classification

Generate monthly composites to classify the time series

In [5]:
results = {}
for year in years:
    yearly_results = {}
    
    # load raw data once
    df = pd.read_csv(DATA_PATH+year+"_raw.csv", index_col=0, low_memory=False)
    y = df.T.pop("class")[2:].astype(int)
    df = df.drop("class")
    df.columns = ["band", "date", *[int(x) for x in df.columns[2:]]]
    df.date = pd.to_datetime(df.date)
    df["month"] = df.date.dt.month
    df = df.drop("date", axis=1)
    df = df.groupby(["band","month"]).mean()
    df = df.loc[bands]
    df = df.fillna(0)  # Replace NaN with 0
    
    # Prepare features and target for k-fold
    X = df.T.values
    y = y.values
    
    fold_metrics = []
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # K-fold cross validation
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        
        # Define and train the model
        model_rf = RandomForestClassifier(n_estimators=1000, random_state=42)
        model_rf.fit(X_train, y_train)
        
        # Predict on validation set
        y_pred_rf = model_rf.predict(X_val)
        
        # Calculate metrics
        acc_rf = 100 * metrics.accuracy_score(y_val, y_pred_rf)
        f1_w_rf = 100 * metrics.f1_score(y_val, y_pred_rf, average='weighted')
        
        fold_metrics.append([f1_w_rf, acc_rf])
    
    # Average metrics across folds
    results[year] = np.mean(fold_metrics, axis=0).tolist()

# DataFrame with F1 and OA
df_metrics = pd.DataFrame(results).T
df_metrics.columns = ['F1', 'OA']
df_metrics.index.name = 'Year'

# Save to CSV
df_metrics.to_csv('RF_monthly_metrics.csv')
df_metrics.mean()

F1    73.090153
OA    74.054054
dtype: float64

In [6]:
results = {}
for year in years:
    yearly_results = {}
    
    # load raw data once
    df = pd.read_csv(DATA_PATH+year+"_raw.csv", index_col=0, low_memory=False)
    y = df.T.pop("class")[2:].astype(int)
    df = df.drop("class")
    df.columns = ["band", "date", *[int(x) for x in df.columns[2:]]]
    df.date = pd.to_datetime(df.date)
    df["month"] = df.date.dt.month
    df = df.drop("date", axis=1)
    df = df.groupby(["band","month"]).mean()
    df = df.loc[bands]
    df = df.fillna(0)  # Replace NaN with 0
    
    # Prepare features and target for k-fold
    X = df.T.values
    y = y.values
    
    fold_metrics = []
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # K-fold cross validation
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        
        # Define and train the model
        model_lgb = lgb.LGBMClassifier(
            n_estimators=1000,
            random_state=42,
            objective='multiclass',
            num_class=len(np.unique(y)),
            metric='multi_logloss',
            verbose=-1
        )
        model_lgb.fit(X_train, y_train)
        
        # Predict on validation set
        y_pred_lgb = model_lgb.predict(X_val)
        
        # Calculate metrics
        acc_lgb = 100 * metrics.accuracy_score(y_val, y_pred_lgb)
        f1_w_lgb = 100 * metrics.f1_score(y_val, y_pred_lgb, average='weighted')
        
        fold_metrics.append([f1_w_lgb, acc_lgb])
    
    # Average metrics across folds
    results[year] = np.mean(fold_metrics, axis=0).tolist()
# DataFrame with F1 and OA
df_metrics = pd.DataFrame(results).T
df_metrics.columns = ['F1', 'OA']
df_metrics.index.name = 'Year'

# Save to CSV
df_metrics.to_csv('lightGBM_monthly_metrics.csv')
df_metrics.mean()

F1    72.512747
OA    72.972973
dtype: float64