In [1]:
### This code is supposed to be run in Kaggle notebook for benchmarking the FME approach
### https://www.kaggle.com/code/mathisjander/240709-benchmark-fme/notebook

import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
# define n iterations
n = 5

# load metadata

metadata = pd.read_csv('/kaggle/input/geolifeclef-2024/GLC24_PA_metadata_train.csv')

# get survey ids
survey_ids = metadata['surveyId'].unique()
survey_ids

# define train ratio
train_ratio = 0.8

# shuffle survey ids


def get_train_val_survey_ids(survey_ids, train_ratio):
    
    # shuffle survey ids
    np.random.shuffle(survey_ids)
    # split survey ids into train and val
    n_train = int(train_ratio * len(survey_ids))
    train_survey_ids = survey_ids[:n_train]
    val_survey_ids = survey_ids[n_train:]
    return train_survey_ids, val_survey_ids





In [3]:
num_classes = 11255 # Number of all unique classes within the PO and PA data.

In [4]:
import os
import torch
import tqdm
import numpy as np
import pandas as pd
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from sklearn.metrics import precision_recall_fscore_support

In [5]:
from PIL import Image

def construct_patch_path(data_path, survey_id):
    """Construct the patch file path based on plot_id as './CD/AB/XXXXABCD.jpeg'"""
    path = data_path
    for d in (str(survey_id)[-2:], str(survey_id)[-4:-2]):
        path = os.path.join(path, d)

    path = os.path.join(path, f"{survey_id}.jpeg")

    return path

class BaselineTrainDataset(Dataset):
    def __init__(self, bioclim_data_dir, landsat_data_dir, sentinel_data_dir, metadata, survey_ids, transform=None):
        self.transform = transform
        self.sentinel_transform = transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5, 0.5)),
        ])
      
        self.bioclim_data_dir = bioclim_data_dir
        self.landsat_data_dir = landsat_data_dir
        self.sentinel_data_dir = sentinel_data_dir
        self.metadata = metadata
        self.metadata = self.metadata.dropna(subset="speciesId").reset_index(drop=True)
        self.metadata['speciesId'] = self.metadata['speciesId'].astype(int)
        self.label_dict = self.metadata.groupby('surveyId')['speciesId'].apply(list).to_dict()
        
        self.metadata = self.metadata.drop_duplicates(subset="surveyId").reset_index(drop=True)

        self.survey_ids = survey_ids

    def __len__(self):
        return len(self.survey_ids)

    def __getitem__(self, idx):
        
        survey_id = self.survey_ids[idx]
        
        landsat_sample = torch.nan_to_num(torch.load(os.path.join(self.landsat_data_dir, f"GLC24-PA-train-landsat-time-series_{survey_id}_cube.pt")))
        bioclim_sample = torch.nan_to_num(torch.load(os.path.join(self.bioclim_data_dir, f"GLC24-PA-train-bioclimatic_monthly_{survey_id}_cube.pt")))

        rgb_sample = np.array(Image.open(construct_patch_path(self.sentinel_data_dir, survey_id)))
        nir_sample = np.array(Image.open(construct_patch_path(self.sentinel_data_dir.replace("rgb", "nir").replace("RGB", "NIR"), survey_id)))
        sentinel_sample = np.concatenate((rgb_sample, nir_sample[...,None]), axis=2)

        species_ids = self.label_dict.get(survey_id, [])  # Get list of species IDs for the survey ID
        label = torch.zeros(num_classes)  # Initialize label tensor
        for species_id in species_ids:
            label_id = species_id
            label[label_id] = 1  # Set the corresponding class index to 1 for each species
        
        if isinstance(landsat_sample, torch.Tensor):
            landsat_sample = landsat_sample.permute(1, 2, 0)  # Change tensor shape from (C, H, W) to (H, W, C)
            landsat_sample = landsat_sample.numpy()  # Convert tensor to numpy array
            
        if isinstance(bioclim_sample, torch.Tensor):
            bioclim_sample = bioclim_sample.permute(1, 2, 0)  # Change tensor shape from (C, H, W) to (H, W, C)
            bioclim_sample = bioclim_sample.numpy()  # Convert tensor to numpy array   
        
        if self.transform:
            landsat_sample = self.transform(landsat_sample)
            bioclim_sample = self.transform(bioclim_sample)
            sentinel_sample = self.sentinel_transform(sentinel_sample)

        return landsat_sample, bioclim_sample, sentinel_sample, label, survey_id



In [6]:
# Train Dataset and DataLoader

def create_loaders(train_survey_ids, val_survey_ids):
  train_batch_size = 64
  val_batch_size = 64
  transform = transforms.Compose([
      transforms.ToTensor()
  ])

  # Load Training metadata
  train_landsat_data_path = "/kaggle/input/geolifeclef-2024/TimeSeries-Cubes/TimeSeries-Cubes/GLC24-PA-train-landsat_time_series"
  train_bioclim_data_path = "/kaggle/input/geolifeclef-2024/TimeSeries-Cubes/TimeSeries-Cubes/GLC24-PA-train-bioclimatic_monthly"
  train_sentinel_data_path="/kaggle/input/geolifeclef-2024/PA_Train_SatellitePatches_RGB/pa_train_patches_rgb"
  train_metadata_path = "/kaggle/input/geolifeclef-2024/GLC24_PA_metadata_train.csv"

  train_metadata = pd.read_csv(train_metadata_path)

  train_data = BaselineTrainDataset(train_bioclim_data_path, train_landsat_data_path, train_sentinel_data_path, train_metadata, train_survey_ids, transform=transform)
  train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=False, num_workers=4)

  val_data = BaselineTrainDataset(train_bioclim_data_path, train_landsat_data_path, train_sentinel_data_path, train_metadata, val_survey_ids, transform=transform)
  val_loader = DataLoader(val_data, batch_size=val_batch_size, shuffle=False, num_workers=4)

  return train_loader, val_loader



In [7]:
import torch
import numpy as np
from tqdm import tqdm

def calculate_f1_score_from_tensors(y_true, y_pred, threshold=0.5):
    
    y_pred = (y_pred >= threshold)
    y_true = y_true.cpu().bool()
    y_pred = y_pred.cpu().bool()
    
    TP = (y_true & y_pred).sum(axis=1)  # True Positives per sample
    FP = (y_true & ~y_pred).sum(axis=1)  # False Positives per sample
    FN = (~y_true & y_pred).sum(axis=1)  # False Negatives per sample

    # compute f1 score for each sample
    pre = TP/(TP+FP)
    rec = TP/(TP+FN)
    f1 = 2 * pre * rec / (pre + rec)

    # Handle division by zero
    f1 = np.nan_to_num(f1)

    # compute micro-average f1 score
    micro_f1 = np.mean(f1)
    # Return mean F1 score across all samples
    return micro_f1



In [8]:
# Check if cuda is available
device = torch.device("mps")

if torch.cuda.is_available():
    device = torch.device("cuda")
    

print(device)

cuda


In [9]:
import torch.nn.functional as F

class MultimodalEnsemble(nn.Module):
    def __init__(self, num_classes):
        super(MultimodalEnsemble, self).__init__()
        
        self.landsat_norm = nn.LayerNorm([6,4,21])
        self.landsat_model = models.resnet18(weights=None)
        # Modify the first convolutional layer to accept 6 channels instead of 3
        self.landsat_model.conv1 = nn.Conv2d(6, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.landsat_model.maxpool = nn.Identity()
        self.ls_ln = nn.LayerNorm(1000)
        self.ls_fc1 = nn.Linear(1000, 4096)
        self.ls_dropout = nn.Dropout(p=0.1)
        self.ls_fc2 = nn.Linear(4096, num_classes)
        
        self.bioclim_norm = nn.LayerNorm([4,19,12])
        self.bioclim_model = models.resnet18(weights=None)  
        # Modify the first convolutional layer to accept 4 channels instead of 3
        self.bioclim_model.conv1 = nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bioclim_model.maxpool = nn.Identity()
        self.bc_ln = nn.LayerNorm(1000)
        self.bc_fc1 = nn.Linear(1000, 4096)
        self.bc_dropout = nn.Dropout(p=0.1)
        self.bc_fc2 = nn.Linear(4096, num_classes)
        
        self.sentinel_model = models.swin_t(weights="IMAGENET1K_V1")
        # Modify the first layer to accept 4 channels instead of 3
        self.sentinel_model.features[0][0] = nn.Conv2d(4, 96, kernel_size=(4, 4), stride=(4, 4))
        self.sentinel_model.head = nn.Identity()
        self.se_ln = nn.LayerNorm(768)
        self.se_fc1 = nn.Linear(768, 4096)
        self.se_dropout = nn.Dropout(p=0.1)
        self.se_fc2 = nn.Linear(4096, num_classes)
        
        
    def forward(self, x, y, z):
        
        x = self.landsat_norm(x)
        x = self.landsat_model(x)
        x = self.bc_ln(x)
        x = self.ls_fc1(x)
        x = self.ls_dropout(x)
        x = self.ls_fc2(x)
        
        y = self.bioclim_norm(y)
        y = self.bioclim_model(y)
        y = self.bc_ln(y)
        y = self.bc_fc1(y)
        y = self.bc_dropout(y)
        y = self.bc_fc2(y)
        
        z = self.sentinel_model(z)
        z = self.se_ln(z)
        z = self.se_fc1(z)
        z = self.se_dropout(z)
        z = self.se_fc2(z)
        
        # average the predictions
        out = (x + y + z) / 3
        return out

In [10]:
# Training loop with custom F1 score
f1s_fme = []

for i in range(n):

    # get train and val survey ids
    train_survey_ids, val_survey_ids = get_train_val_survey_ids(survey_ids, train_ratio)

    train_loader, val_loader = create_loaders(train_survey_ids, val_survey_ids)
    
    model = MultimodalEnsemble(num_classes=num_classes)
    model.to(device)

    # Hyperparameters
    learning_rate = 0.00025
    num_epochs = 10
    positive_weigh_factor = 1.0

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = CosineAnnealingLR(optimizer, T_max=25, verbose=True)


    # Training loop
    for epoch in range(num_epochs):
        model.train()
        
        for data1, data2, data3, targets, _ in train_loader:

            data1 = data1.to(device)
            data2 = data2.to(device)
            data3 = data3.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(data1, data2, data3)

            pos_weight = targets*positive_weigh_factor  # All positive weights are equal to 10
            criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
            loss = criterion(outputs, targets)

            loss.backward()
            optimizer.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    print("Training complete.")

    # Predict with trained models on validation set
    model.eval()
    f1s_iteration = []

    for data1, data2, data3, targets, _ in tqdm(val_loader):
        data1 = data1.to(device)
        data2 = data2.to(device)
        data3 = data3.to(device)
        targets = targets.to(device)

        with torch.no_grad():
            outputs = model(data1, data2, data3)
            outputs = torch.sigmoid(outputs)

        # Calculate custom F1 score
        f1 = calculate_f1_score_from_tensors(targets, outputs, threshold=0.5)
        f1s_iteration.append(f1)

    # Calculate mean F1 score for the iteration
    mean_f1_iteration = np.mean(f1s_iteration)
    print(f"Iteration {i+1} - Mean F1 Score: {mean_f1_iteration}")

    # Append mean F1 score to list
    f1s_fme.append(mean_f1_iteration)

Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /root/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:00<00:00, 141MB/s]


Adjusting learning rate of group 0 to 2.5000e-04.
Epoch [1/10], Loss: 0.0039
Epoch [2/10], Loss: 0.0036
Epoch [3/10], Loss: 0.0034
Epoch [4/10], Loss: 0.0031
Epoch [5/10], Loss: 0.0030
Epoch [6/10], Loss: 0.0029
Epoch [7/10], Loss: 0.0027
Epoch [8/10], Loss: 0.0026
Epoch [9/10], Loss: 0.0026
Epoch [10/10], Loss: 0.0024
Training complete.


100%|██████████| 279/279 [02:20<00:00,  1.99it/s]


Iteration 1 - Mean F1 Score: 0.32395538687705994
Adjusting learning rate of group 0 to 2.5000e-04.
Epoch [1/10], Loss: 0.0054
Epoch [2/10], Loss: 0.0049
Epoch [3/10], Loss: 0.0043
Epoch [4/10], Loss: 0.0040
Epoch [5/10], Loss: 0.0036
Epoch [6/10], Loss: 0.0034
Epoch [7/10], Loss: 0.0032
Epoch [8/10], Loss: 0.0030
Epoch [9/10], Loss: 0.0028
Epoch [10/10], Loss: 0.0026
Training complete.


100%|██████████| 279/279 [00:44<00:00,  6.34it/s]


Iteration 2 - Mean F1 Score: 0.3021313548088074
Adjusting learning rate of group 0 to 2.5000e-04.
Epoch [1/10], Loss: 0.0054
Epoch [2/10], Loss: 0.0047
Epoch [3/10], Loss: 0.0043
Epoch [4/10], Loss: 0.0039
Epoch [5/10], Loss: 0.0037
Epoch [6/10], Loss: 0.0035
Epoch [7/10], Loss: 0.0033
Epoch [8/10], Loss: 0.0032
Epoch [9/10], Loss: 0.0032
Epoch [10/10], Loss: 0.0031
Training complete.


100%|██████████| 279/279 [00:44<00:00,  6.23it/s]


Iteration 3 - Mean F1 Score: 0.28171759843826294
Adjusting learning rate of group 0 to 2.5000e-04.
Epoch [1/10], Loss: 0.0045
Epoch [2/10], Loss: 0.0041
Epoch [3/10], Loss: 0.0036
Epoch [4/10], Loss: 0.0034
Epoch [5/10], Loss: 0.0031
Epoch [6/10], Loss: 0.0030
Epoch [7/10], Loss: 0.0028
Epoch [8/10], Loss: 0.0027
Epoch [9/10], Loss: 0.0026
Epoch [10/10], Loss: 0.0024
Training complete.


100%|██████████| 279/279 [00:48<00:00,  5.78it/s]


Iteration 4 - Mean F1 Score: 0.29393792152404785
Adjusting learning rate of group 0 to 2.5000e-04.
Epoch [1/10], Loss: 0.0054
Epoch [2/10], Loss: 0.0049
Epoch [3/10], Loss: 0.0047
Epoch [4/10], Loss: 0.0044
Epoch [5/10], Loss: 0.0041
Epoch [6/10], Loss: 0.0039
Epoch [7/10], Loss: 0.0038
Epoch [8/10], Loss: 0.0037
Epoch [9/10], Loss: 0.0036
Epoch [10/10], Loss: 0.0033
Training complete.


100%|██████████| 279/279 [00:46<00:00,  5.99it/s]

Iteration 5 - Mean F1 Score: 0.2835463583469391





In [11]:
import datetime

now = datetime.datetime.now()
timestamp = now.strftime('%Y-%m-%d_%H-%M-%S')

results = pd.DataFrame(f1s_fme, columns=['fme_f1'])
results.to_csv(f'{timestamp}_fme_benchmark_results_{n}.csv', index=False)

- code should work but needs to be run in kaggle or colab for gpu. 