In [5]:
import pandas as pd
import numpy as np
import warnings
import os

warnings.filterwarnings('ignore')

# Load data
# Ensure the path to your CSV is correct

df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_final.csv")

df = df[df['flux_method'] == 'EC']


# --- Data Preprocessing ---

# Fill ALT with 2000 for NaNs between 1997–2021
# This assumes 'ALT' column exists.
# if 'ALT' in df.columns:
#     time_interval_mask = (df['year'] >= 1997) & (df['year'] <= 2021)
#     df.loc[time_interval_mask, 'ALT'] = df.loc[time_interval_mask, 'ALT'].fillna(2000)

# Create tmean_C and date
# Note: Assuming 'tmmx' and 'tmmn' are the correct column names instead of tmax_C/tmin_C
df['tmean_C'] = df[['tmmx', 'tmmn']].mean(axis=1)
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# --- Dynamic Feature Selection ---

# Define columns to EXCLUDE from lag/rolling feature generation
# This includes identifiers, categorical data, dates, and user-specified exclusions
base_exclusions = {
    'site_name', 'site_reference', 'latitude', 'longitude', 'flux_method',
    'country', 'land_cover_eco', 'land_cover_plot', 'bawld_class', 'year',
    'month', 'SummaryQA', 'land_cover', 'co2_cont', 'date', 'ALT', 'soil', 'land_cover',
    'siteID', 'Flux'
}

# Find all columns that end with the '_100cm' suffix
suffix_exclusions = {col for col in df.columns if col.endswith('_100cm')}

# Combine all sets of exclusions
all_exclusions = base_exclusions.union(suffix_exclusions)

# Dynamically create the list of predictors to expand
predictors_to_expand = [col for col in df.columns if col not in all_exclusions]
print(f"✅ Columns selected for feature engineering: {predictors_to_expand}")

# --- Feature Engineering ---

# Make a copy to store engineered features
df_engineered = df[['site_reference', 'date']].copy()

# Sort by site and date for consistent time-series calculations
df = df.sort_values(['site_reference', 'date'])

# Generate lag, rolling mean, and rolling std features
for var in predictors_to_expand:
    for lag in [1, 2, 3]:
        df_engineered[f'{var}_lag{lag}'] = (
            df.groupby('site_reference')[var]
            .shift(lag)
        )
    
    # df_engineered[f'{var}_roll3_mean'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .mean()
    #     .reset_index(level=0, drop=True) # Add reset_index to align correctly
    # )
    
    # df_engineered[f'{var}_roll3_std'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .std()
    #     .reset_index(level=0, drop=True) # Add reset_index to align correctly
    # )

    # df_engineered[f'{var}_roll3_std'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .std()
    #     .reset_index(level=0, drop=True)
    #     .fillna(0)  # <-- Add this line to fill NaNs with 0
    # )

# --- Finalizing DataFrame ---

# Merge engineered features back into original dataframe
df_full = pd.merge(df, df_engineered, on=['site_reference', 'date'], how='left')

# Save to new CSV
output_path = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags.csv"
df_full.to_csv(output_path, index=False)
print(f"🎉 Successfully saved data with new features to: {output_path}")

✅ Columns selected for feature engineering: ['EVI', 'NDVI', 'sur_refl_b01', 'sur_refl_b02', 'sur_refl_b03', 'sur_refl_b07', 'NDWI', 'aet', 'def', 'pdsi', 'pet', 'pr', 'ro', 'srad', 'swe', 'tmmn', 'tmmx', 'vap', 'vpd', 'vs', 'lai', 'fpar', 'Percent_NonTree_Vegetation', 'Percent_NonVegetated', 'Percent_Tree_Cover', 'nee', 'gpp', 'reco', 'ch4_flux_total', 'tmean_C']
🎉 Successfully saved data with new features to: /explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags.csv


Use group mean instead

In [4]:
import pandas as pd
import numpy as np
import warnings
import os

warnings.filterwarnings('ignore')

# Load data
# Ensure the path to your CSV is correct
df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_final.csv")

df = df[df['flux_method'] == 'EC']


# --- Data Preprocessing ---

# Create tmean_C and date
df['tmean_C'] = df[['tmmx', 'tmmn']].mean(axis=1)
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# --- Dynamic Feature Selection ---

# Define columns to EXCLUDE from lag/rolling feature generation
base_exclusions = {
    'site_name', 'site_reference', 'latitude', 'longitude', 'flux_method',
    'country', 'land_cover_eco', 'land_cover_plot', 'bawld_class', 'year',
    'month', 'SummaryQA', 'land_cover', 'co2_cont', 'date', 'ALT', 'soil', 'land_cover',
    'siteID'
}

# Find all columns that end with the '_100cm' suffix
suffix_exclusions = {col for col in df.columns if col.endswith('_100cm')}

# Combine all sets of exclusions
all_exclusions = base_exclusions.union(suffix_exclusions)

# Dynamically create the list of predictors to expand
predictors_to_expand = [col for col in df.columns if col not in all_exclusions]
print(f"✅ Columns selected for feature engineering: {predictors_to_expand}")

# --- Feature Engineering ---

# Make a copy to store engineered features
df_engineered = df[['site_reference', 'date']].copy()

# Sort by site and date for consistent time-series calculations
df = df.sort_values(['site_reference', 'date'])

# Generate lag, rolling mean, and rolling std features
for var in predictors_to_expand:
    # Calculate the mean for the current variable, grouped by site
    # This will be used to fill NaNs for the initial lag periods.
    group_mean = df.groupby('site_reference')[var].transform('mean')

    # Generate lag features and fill NaNs with the site-specific group mean
    for lag in [1, 2, 3]:
        df_engineered[f'{var}_lag{lag}'] = (
            df.groupby('site_reference')[var]
            .shift(lag)
            .fillna(group_mean)
        )
    
    # Generate rolling features (these handle NaNs by default so no change needed)
    # df_engineered[f'{var}_roll3_mean'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .mean()
    #     .reset_index(level=0, drop=True)
        
    # )
    
    # df_engineered[f'{var}_roll3_std'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .std()
    #     .reset_index(level=0, drop=True)
    # )
    
    # df_engineered[f'{var}_roll3_std'] = (
    #     df.groupby('site_reference')[var]
    #     .shift(1)
    #     .rolling(window=3, min_periods=1)
    #     .std()
    #     .reset_index(level=0, drop=True)
    #     .fillna(0)  # <-- Add this line to fill NaNs with 0
    # )

# --- Finalizing DataFrame ---

# Merge engineered features back into original dataframe
df_full = pd.merge(df, df_engineered, on=['site_reference', 'date'], how='left')

# Save to new CSV
output_path = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags_filled.csv"
df_full.to_csv(output_path, index=False)
print(f"🎉 Successfully saved data with new features to: {output_path}")

✅ Columns selected for feature engineering: ['EVI', 'NDVI', 'sur_refl_b01', 'sur_refl_b02', 'sur_refl_b03', 'sur_refl_b07', 'NDWI', 'aet', 'def', 'pdsi', 'pet', 'pr', 'ro', 'srad', 'swe', 'tmmn', 'tmmx', 'vap', 'vpd', 'vs', 'lai', 'fpar', 'Percent_NonTree_Vegetation', 'Percent_NonVegetated', 'Percent_Tree_Cover', 'nee', 'gpp', 'reco', 'ch4_flux_total', 'Flux', 'tmean_C']


TypeError: Could not convert CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4CO2 and CH4 to numeric

In [17]:
print(df.shape)
print(df_full.shape)

(12165, 52)
(12165, 127)


In [15]:
# Place this code after loading the CSV
df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_alt_soil_lc_co2.csv")
# df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_v2.csv")


df = df[df['site_reference'] == 'Barrow-CMDL_US-Brw_tower']

df = df[df['flux_method'] == 'EC']


# --- Data Preprocessing ---

# Create tmean_C and date
df['tmean_C'] = df[['tmmx', 'tmmn']].mean(axis=1)
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

df = df[['site_reference', 'year', 'month', 'date']]

df
# # Find and display any rows with duplicate site_reference and date
# duplicate_rows = df[df.duplicated(subset=['site_reference', 'date'], keep=False)]

# if not duplicate_rows.empty:
#     print("🚨 Found duplicate rows based on site_reference and date:")
#     print(duplicate_rows.sort_values(['site_reference', 'date']))
# else:
#     print("✅ No duplicate rows found.")

Unnamed: 0,site_reference,year,month,date
185071,Barrow-CMDL_US-Brw_tower,2013,9,2013-09-01
185072,Barrow-CMDL_US-Brw_tower,2013,9,2013-09-01
185073,Barrow-CMDL_US-Brw_tower,2013,9,2013-09-01
185074,Barrow-CMDL_US-Brw_tower,2013,9,2013-09-01
194296,Barrow-CMDL_US-Brw_tower,2014,7,2014-07-01
...,...,...,...,...
625479,Barrow-CMDL_US-Brw_tower,2022,11,2022-11-01
627403,Barrow-CMDL_US-Brw_tower,2022,12,2022-12-01
627404,Barrow-CMDL_US-Brw_tower,2022,12,2022-12-01
627405,Barrow-CMDL_US-Brw_tower,2022,12,2022-12-01


In [1]:
import pandas as pd
import numpy as np
import os
import gc
import warnings

warnings.filterwarnings("ignore")

# --- Configuration ---
input_csv = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_alt_soil_lc_co2.csv"
interim_dir = "/explore/nobackup/people/spotter5/anna_v/v2/temp_chunks"
final_parquet = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags.parquet"
os.makedirs(interim_dir, exist_ok=True)

chunk_size = 10  # Number of predictors per chunk

# --- Load and Preprocess ---
df = pd.read_csv(input_csv)
df['tmean_C'] = df[['tmmx', 'tmmn']].mean(axis=1)
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))
df = df.sort_values(['site_reference', 'date'])

base_cols = ['site_reference', 'date']

# Define exclusions
base_exclusions = {
    'site_name', 'site_reference', 'latitude', 'longitude', 'flux_method',
    'country', 'land_cover_eco', 'land_cover_plot', 'bawld_class', 'year',
    'month', 'SummaryQA', 'ch4_flux_total', 'land_cover', 'co2_cont', 'date', 'ALT', 'soil', 'siteID'
}
suffix_exclusions = {col for col in df.columns if col.endswith('_100cm')}
all_exclusions = base_exclusions.union(suffix_exclusions)
predictors = [col for col in df.columns if col not in all_exclusions]

print(f"✅ Total predictors selected: {len(predictors)}")

# --- Generate Features in Chunks ---
for i in range(0, len(predictors), chunk_size):
    chunk = predictors[i:i + chunk_size]
    print(f"\n🚀 Processing chunk {i // chunk_size + 1} with predictors: {chunk}")

    df_chunk = df[base_cols].copy()

    for var in chunk:
        grouped = df.groupby('site_reference')[var]

        df_chunk[f'{var}_lag1'] = grouped.shift(1)
        df_chunk[f'{var}_lag2'] = grouped.shift(2)
        df_chunk[f'{var}_lag3'] = grouped.shift(3)

        df_chunk[f'{var}_roll3_mean'] = (
            grouped.apply(lambda x: x.shift(1).rolling(window=3, min_periods=1).mean())
            .reset_index(level=0, drop=True)
        )
        df_chunk[f'{var}_roll3_std'] = (
            grouped.apply(lambda x: x.shift(1).rolling(window=3, min_periods=1).std())
            .reset_index(level=0, drop=True)
        )

        del grouped
        gc.collect()

    # Save engineered chunk to Parquet
    chunk_parquet = os.path.join(interim_dir, f"features_chunk_{i // chunk_size + 1}.parquet")
    df_chunk.to_parquet(chunk_parquet, index=False)
    print(f"💾 Saved chunk to {chunk_parquet}")
    del df_chunk
    gc.collect()

# --- Merge all chunks and write Parquet ---
print("\n🔗 Merging all feature chunks...")

# Start with base
df_out = df[base_cols + [c for c in df.columns if c not in all_exclusions]].copy()

# Merge chunk by chunk
for fname in sorted(os.listdir(interim_dir)):
    if fname.endswith(".parquet"):
        chunk_df = pd.read_parquet(os.path.join(interim_dir, fname))
        df_out = pd.merge(df_out, chunk_df, on=base_cols, how='left')
        print(f"🧩 Merged {fname}")
        del chunk_df
        gc.collect()

# Final save to Parquet
df_out.to_parquet(final_parquet, index=False)
print(f"\n✅ Final Parquet saved to: {final_parquet}")


✅ Total predictors selected: 24

🚀 Processing chunk 1 with predictors: ['EVI', 'NDVI', 'sur_refl_b01', 'sur_refl_b02', 'sur_refl_b03', 'sur_refl_b07', 'NDWI', 'aet', 'def', 'pdsi']


MemoryError: Unable to allocate 487. GiB for an array with shape (50, 1307431757) and data type float64

In [2]:
df.shape

(652899, 52)

In [None]:
df_full

In [None]:
import pandas as pd
import numpy as np
import warnings
import os

warnings.filterwarnings('ignore')

# Load data
# Ensure the path to your CSV is correct

df = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_alt_soil_lc_co2.csv")


# --- Data Preprocessing ---

# Fill ALT with 2000 for NaNs between 1997–2021
# This assumes 'ALT' column exists.
if 'ALT' in df.columns:
    time_interval_mask = (df['year'] >= 1997) & (df['year'] <= 2021)
    df.loc[time_interval_mask, 'ALT'] = df.loc[time_interval_mask, 'ALT'].fillna(2000)

# Create tmean_C and date
# Note: Assuming 'tmmx' and 'tmmn' are the correct column names instead of tmax_C/tmin_C
df['tmean_C'] = df[['tmmx', 'tmmn']].mean(axis=1)
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))

# --- Dynamic Feature Selection ---

# Define columns to EXCLUDE from lag/rolling feature generation
# This includes identifiers, categorical data, dates, and user-specified exclusions
base_exclusions = {
    'site_name', 'site_reference', 'latitude', 'longitude', 'flux_method',
    'country', 'land_cover_eco', 'land_cover_plot', 'bawld_class', 'year',
    'month', 'SummaryQA', 'ch4_flux_total', 'land_cover', 'co2_cont', 'date', 'ALT', 'soil', 'land_cover',
    'siteID'
}

# Find all columns that end with the '_100cm' suffix
suffix_exclusions = {col for col in df.columns if col.endswith('_100cm')}

# Combine all sets of exclusions
all_exclusions = base_exclusions.union(suffix_exclusions)

# Dynamically create the list of predictors to expand
predictors_to_expand = [col for col in df.columns if col not in all_exclusions]
print(f"✅ Columns selected for feature engineering: {predictors_to_expand}")

# --- Feature Engineering ---

# Make a copy to store engineered features
df_engineered = df[['site_reference', 'date']].copy()

# Sort by site and date for consistent time-series calculations
df = df.sort_values(['site_reference', 'date'])

# Generate lag, rolling mean, and rolling std features
for var in predictors_to_expand:
    for lag in [1, 2, 3]:
        df_engineered[f'{var}_lag{lag}'] = (
            df.groupby('site_reference')[var]
            .shift(lag)
        )
    
    df_engineered[f'{var}_roll3_mean'] = (
        df.groupby('site_reference')[var]
        .shift(1)
        .rolling(window=3, min_periods=1)
        .mean()
        .reset_index(level=0, drop=True) # Add reset_index to align correctly
    )
    
    df_engineered[f'{var}_roll3_std'] = (
        df.groupby('site_reference')[var]
        .shift(1)
        .rolling(window=3, min_periods=1)
        .std()
        .reset_index(level=0, drop=True) # Add reset_index to align correctly
    )

# --- Finalizing DataFrame ---

# Merge engineered features back into original dataframe
df_full = pd.merge(df, df_engineered, on=['site_reference', 'date'], how='left')

# Save to new CSV
output_path = "/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags_alt.csv"
df_full.to_csv(output_path, index=False)
print(f"🎉 Successfully saved data with new features to: {output_path}")

In [4]:
import pandas as pd
df = pd.read_csv('/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_data_with_lags.csv')

for i in df.columns:

    print(i)

site_name
site_reference
latitude
longitude
flux_method
country
land_cover_eco
land_cover_plot
bawld_class
year
month
EVI
NDVI
SummaryQA
sur_refl_b01
sur_refl_b02
sur_refl_b03
sur_refl_b07
NDWI
pdsi
soil
srad
swe
tmmn
tmmx
vap
vs
nee
gpp
reco
ch4_flux_total
bdod_0_100cm
cec_0_100cm
cfvo_0_100cm
clay_0_100cm
nitrogen_0_100cm
ocd_0_100cm
phh2o_0_100cm
sand_0_100cm
silt_0_100cm
soc_0_100cm
land_cover
co2_cont
tmean_C
date
EVI_lag1
EVI_lag2
EVI_lag3
EVI_roll3_mean
EVI_roll3_std
NDVI_lag1
NDVI_lag2
NDVI_lag3
NDVI_roll3_mean
NDVI_roll3_std
sur_refl_b01_lag1
sur_refl_b01_lag2
sur_refl_b01_lag3
sur_refl_b01_roll3_mean
sur_refl_b01_roll3_std
sur_refl_b02_lag1
sur_refl_b02_lag2
sur_refl_b02_lag3
sur_refl_b02_roll3_mean
sur_refl_b02_roll3_std
sur_refl_b03_lag1
sur_refl_b03_lag2
sur_refl_b03_lag3
sur_refl_b03_roll3_mean
sur_refl_b03_roll3_std
sur_refl_b07_lag1
sur_refl_b07_lag2
sur_refl_b07_lag3
sur_refl_b07_roll3_mean
sur_refl_b07_roll3_std
NDWI_lag1
NDWI_lag2
NDWI_lag3
NDWI_roll3_mean
NDWI_roll3