In [6]:
import pandas as pd
import numpy as np

In [7]:

train_df = pd.read_csv('../data/train.csv')
train_df.head()

Unnamed: 0,year,is_disturbance,forest_id,blue,green,red,near_infrared,shortwave_infrared_1,shortwave_infrared_2
0,1985,0,87,349.0,714.0,480.0,4015.0,2014.0,652.0
1,1986,0,87,310.0,507.0,384.0,3916.0,1890.0,606.0
2,1987,0,87,329.0,606.0,457.0,4043.0,2225.0,856.0
3,1988,0,87,230.0,500.0,286.0,3815.0,1952.0,646.0
4,1989,0,87,178.0,491.0,390.0,3839.0,2103.0,755.0


Features: NDVI & NBR

In [8]:
# NDVI: Index für "Greeness" zzwischen -1 und 1
# > 0.3 bis +1: Gesunde Vegetation (je höher, desto grüner/dichter). 
train_df['NDVI'] = (train_df['near_infrared'] - train_df['red']) / (train_df['near_infrared'] + train_df['red'])

# NBR: Index für Trockenheit/Brandflächen zwischen -1 und +1
# -0.1 bis +0.1: Trockene/Brandflächen (SWIR dominiert).
#-1 bis -0.2: Wasser/Schatten (niedrige Reflexion in NIR und SWIR)
train_df['NBR'] = (train_df['near_infrared'] - train_df['shortwave_infrared_2']) / (train_df['near_infrared'] + train_df['shortwave_infrared_2'])

Feature: dNBR 


(check: https://un-spider.org/advisory-support/recommended-practices/recommended-practice-burn-severity/in-detail/normalized-burn-ratio)

In [None]:
train_df["dNBR"] = train_df.groupby("forest_id")["NBR"].diff(-1) 

bins = [-float("inf"), -0.251, -0.101, 0.099, 0.269, 0.439, 0.659, float("inf")]
labels = [
    "Enhanced Regrowth, high (post-fire)",
    "Enhanced Regrowth, low (post-fire)",
    "Unburned",
    "Low Severity",
    "Moderate-low Severity",
    "Moderate-high Severity",
    "High Severity"
]

# Create category column
train_df["Severity_Level"] = pd.cut(
    train_df["dNBR"],
    bins=bins,
    labels=labels,
    right=False  
)

from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
train_df["SL_encoded"] = encoder.fit_transform(train_df["Severity_Level"])
print(train_df)

        year  is_disturbance  forest_id   blue       green         red  \
0       1985               0         87  349.0  714.000000  480.000000   
1       1986               0         87  310.0  507.000000  384.000000   
2       1987               0         87  329.0  606.000000  457.000000   
3       1988               0         87  230.0  500.000000  286.000000   
4       1989               0         87  178.0  491.000000  390.000000   
...      ...             ...        ...    ...         ...         ...   
272205  2014               0      19818  205.0  434.000000  215.000000   
272206  2015               0      19818  179.0  397.000000  249.000000   
272207  2016               0      19818  176.0  402.333333  245.333333   
272208  2017               0      19818  173.0  407.666667  241.666667   
272209  2018               0      19818  170.0  413.000000  238.000000   

        near_infrared  shortwave_infrared_1  shortwave_infrared_2      NDVI  \
0         4015.000000           

Feature: TCB (Tassel Cup Brightness)

Reducing noise from aged vegetation and soil characteristics 

Quelle: https://www.sciencedirect.com/science/article/pii/S2666017223000214
Quelle: https://www.researchgate.net/figure/Landsat-5-TM-band-specific-weights-for-TCB-TCG-and-TCW_tbl1_273440224

In [17]:
# Define the TCB coefficients for Landsat 9
TCB_Landsat9 = {
    'blue': 0.3037,
    'green': 0.2793,
    'red': 0.4343,
    'near_infrared': 0.5585,
    'shortwave_infrared_1': 0.5082,
    'shortwave_infrared_2': 0.1863
}

# Calculate the TCB index
train_df['TCB'] = (
    TCB_Landsat9['blue'] * train_df['blue'] +
    TCB_Landsat9['green'] * train_df['green'] +
    TCB_Landsat9['red'] * train_df['red'] +
    TCB_Landsat9['near_infrared'] * train_df['near_infrared'] +
    TCB_Landsat9['shortwave_infrared_1'] * train_df['shortwave_infrared_1'] +
    TCB_Landsat9['shortwave_infrared_2'] * train_df['shortwave_infrared_2']
)

print(train_df[['TCB']].head())

         TCB
0  3901.2354
1  3663.0051
2  4015.8815
3  3576.7445
4  3714.0545
