# Result Notebook

This note book contains the processing workflow how the model we trained can be applied.

# Packages

In [1]:
# Use autoreload to automatically reload modules
%load_ext autoreload
%autoreload 2

In [2]:
import rootutils
root_path = rootutils.find_root()

In [3]:
import cubo
import xarray as xr
# import basic libraries
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
from datetime import datetime
import numpy as np

In [4]:
from burned_embedder.classifier.dataset import create_datasets
from burned_embedder.classifier.model import get_model
from burned_embedder.classifier.model_utils import (
    train_epoch, evaluate, EarlyStopping, print_metrics
)
from burned_embedder.utils import setup_device


# Setting up datalo

Config for the classifier model. Note: model is not trained here - config is just for some metadata.

In [5]:
config = {
    'test_size': 0.2,
    'val_split': 0.1,
    'seed': 2,
    'input_type': 'concat',
    'hidden_dims': [1400, 700, 350],
    'dropout': 0.4,
    'weight_decay': 1e-4,
    'batch_size': 32,
    'lr': 0.001,
    'epochs': 100,
    'patience': 15,
}

Get the datasets and dataloaders that retrieve the computed embeddings.

In [None]:
# Setup device
device = setup_device(gpu_index=1, memory_fraction=1.0)
print(f"\nUsing device: {device}")

# Create datasets
print("\nCreating datasets...")
train_dataset, val_dataset, test_dataset = create_datasets(
    test_size=config['test_size'],
    val_split=config['val_split'],
    random_state=config['seed'],
    input_type=config['input_type'],
    augment=False  
)

# Get input dimension
sample_embedding, _ = train_dataset[0]
input_dim = sample_embedding.shape[0]
print(f"Input dimension: {input_dim}")

# Create dataloaders - reproducible same as for training
train_loader = DataLoader(
    train_dataset, 
    batch_size=config['batch_size'], 
    shuffle=True, 
    num_workers=4
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=config['batch_size'], 
    shuffle=False, 
    num_workers=4
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=config['batch_size'], 
    shuffle=False, 
    num_workers=4
)



Using device: cuda:1

Creating datasets...
Loaded 1556 positive samples
Loaded 1402 negative samples

Dataset splits:
  Train: 2129 samples (pos: 1120, neg: 1009)
  Val:   237 samples (pos: 125, neg: 112)
  Test:  592 samples (pos: 311, neg: 281)
  Input type: concat
Input dimension: 1536

Model Architecture:
DifferenceMLP(
  (model): Sequential(
    (0): Linear(in_features=1536, out_features=1400, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.4, inplace=False)
    (3): Linear(in_features=1400, out_features=700, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.4, inplace=False)
    (6): Linear(in_features=700, out_features=350, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.4, inplace=False)
    (9): Linear(in_features=350, out_features=1, bias=True)
    (10): Sigmoid()
  )
)

Total parameters: 3,378,201
Trainable parameters: 3,378,201


# Load trained Deforestation Image-Level Classifier

# 