# WZE-UAV Image Classification using Deep Learning

In [1]:
import os
import gc
import glob
import numpy as np
from pathlib import Path
from tqdm.auto import tqdm
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import KFold

In [2]:
import torch
import torchvision.transforms as T
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
from mlxtend.plotting import plot_confusion_matrix

In [3]:
import wze_uav.data_loader as data_loader
import wze_uav.models as models
from wze_uav.engine import *
from wze_uav.utils2 import *
#from wze_uav.log_writer import create_writer
from wze_uav.datasplit import *
from efficientnet import model_effnet #for custom effnet with n_channels input
import wandb

#### Get PyTorch version

In [4]:
print(f"torch version: {torch.__version__}")
print(f"torchvision version: {torchvision.__version__}")

torch version: 1.13.1+cu116
torchvision version: 0.14.1+cu116


#### Preparing device agnostic code 

In [5]:
# ensure device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# get index of currently selected device
print(f"Index of current divice: {torch.cuda.current_device()}")
# get number of GPUs available
print(f"Number of GPUs available: {torch.cuda.device_count()}")
# get the name of the device
print(f"GPU Model: {torch.cuda.get_device_name(0)}")

cuda
Index of current divice: 0
Number of GPUs available: 1
GPU Model: Quadro RTX 8000


#### Login to Weights & Biases to track results

In [6]:
# IMPORTANT: The proxy needs to be set in anaconda!
# copy paste this in anaconda and restart jupyter notebook
#set http_proxy=http://www-proxy.bayern.de:80
#set https_proxy=http://www-proxy.bayern.de:80
wandb.login()
#wandb.init(settings=wandb.Settings(start_method="thread"))
wandb.init(project='wze-uav-v2', entity='simon-ecke')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msimon-ecke[0m. Use [1m`wandb login --relogin`[0m to force relogin


#### Ensure reproducibility 

In [7]:
# for more information, see also: https://pytorch.org/docs/stable/notes/randomness.html

# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)
    # seed for numpy
    np.random.seed(seed)

set_seeds(42) 

# Set to true -> might speed up the process but should be set to False if reproducible results are desired
torch.backends.cudnn.benchmark = False


#### Define file directory

In [8]:
#####################################################################################
# 3 channel input (r-g-b)
data_path = r"D:\Drohnendaten\10_WZE-UAV\Auswertung_findatree\Datasplit\ROI\rgb"

# 4 channel input (r-g-b-nir)
#data_path = r"D:\Drohnendaten\10_WZE-UAV\Auswertung_findatree\Datasplit\ROI\rgb-nir"

# 5 channel input (r-g-b-re-nir)
#data_path = r"D:\Drohnendaten\10_WZE-UAV\Auswertung_findatree\Datasplit\ROI\rgb-re-nir"
#####################################################################################

#### Get all file paths

In [9]:
fn_list = os.listdir(data_path)
path_list = []
# Iterate over all datafiles
for year in fn_list:
    year_dir = f'{data_path}\\{year}'
    for filename in os.listdir(year_dir):
        path = f'{year_dir}\\{filename}'
        path_list.append(path)

#### Create unique hash IDs for every individual tree

In [10]:
hashID_dict = data_loader.get_unique_treeID(path_list)

Creating unique tree IDs...:   0%|          | 0/647 [00:00<?, ?it/s]

#### Import all imagery, labels and other features from hdf5 files

In [11]:
image_set, label_set, species_set, kkl_set, bk_set, hash_id = data_loader.hdf5_to_img_label(path_list,
                                                                                               hashID_dict,
                                                                                               load_sets=["images_masked"])

Processing hdf5 datasets:   0%|          | 0/647 [00:00<?, ?it/s]

#### Convert nbv to classes

In [12]:
label_set = nbv_to_sst_3classes(label_set)

#### Split data into a sub set and a test dataset

In [13]:
sub_image_set, sub_label_set, sub_hash_id, sub_species_set, test_image_set, test_label_set, test_hash_id, test_species_set = data_split(image_set, label_set, hash_id, species_set, n_splits=6, random_state=42, seed=2)


ORIGINAL POSITIVE RATIO: 0.42823717096516883
Fold : 0
TRAIN POSITIVE RATIO: 0.4286353181702019
TEST POSITIVE RATIO : 0.4262607040913416
LENGTH TRAIN GROUPS : 5900
LENGTH TEST GROUPS  : 1187
Number of True in sub_indices: 15652
Number of False in sub_indices: 3153
Number of True in test_indices: 1187
Number of False in test_indices: 17618
Check shapes:

Images sub dataset: (15652, 250, 250, 3)
Labels sub dataset: (15652, 1)

Images test dataset: (1187, 250, 250, 3)
Labels test dataset: (1187, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in sub dataset: 9208
Stressed trees in sub dataset: 6179
Dead trees in sub dataset: 265
Healthy trees in test dataset: 685
Stressed trees in test dataset: 483
Dead trees in test dataset: 19
Ratio health trees in test dataset: 0.07439183318853171
Ratio stressed trees in test dataset: 0.07816798834762907
Ratio dead trees in test dataset: 0.07169811320754717


#### Check if any hash ID is in both sub and test dataset

In [14]:
hash_set = set(sub_hash_id[:,0].flatten())
test_hash_set = set(test_hash_id[:,0].flatten())
intersection = hash_set.intersection(test_hash_set)
if intersection:
    print(f"Hash_id values in both train and test sets: {len(intersection)}")
else:
    print("There are no hash_id values in both train and test datasets. The datasplit was successful")

There are no hash_id values in both train and test datasets. The datasplit was successful


#### Check feature distribution of the Test dataset

In [15]:
def count_occurrences(data, value):
    count = 0
    for item in data:
        if item == value:
            count += 1
    return count

print("Test dataset")
print(f"Test data healthy trees: {count_occurrences(test_label_set, 0)}")
print(f"Test data stressed trees: {count_occurrences(test_label_set, 1)}")
print(f"Test data dead trees: {count_occurrences(test_label_set, 2)}")
print(f"Test data pine trees: {count_occurrences(test_species_set, 134)}")
print(f"Test data spruces: {count_occurrences(test_species_set, 118)}")
print("-"*50)

print("Remaining dataset")
print(f"Remaining data healthy trees: {count_occurrences(sub_label_set, 0)}")
print(f"Remaining data stressed trees: {count_occurrences(sub_label_set, 1)}")
print(f"Remaining data dead trees: {count_occurrences(sub_label_set, 2)}")
print(f"Remaining data pine trees: {count_occurrences(sub_species_set, 134)}")
print(f"Remaining data spruces: {count_occurrences(sub_species_set, 118)}")
print("-"*50)

Test dataset
Test data healthy trees: 685
Test data stressed trees: 483
Test data dead trees: 19
Test data pine trees: 301
Test data spruces: 468
--------------------------------------------------
Remaining dataset
Remaining data healthy trees: 9208
Remaining data stressed trees: 6179
Remaining data dead trees: 265
Remaining data pine trees: 3958
Remaining data spruces: 5791
--------------------------------------------------


In [16]:
# train transform with augmentation. 
transform_train = transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5),
                                      transforms.RandomRotation(degrees=[0,360])])

# test and val dataset transform without augmentation. 
transform = transforms.Compose([transforms.ToTensor()])

# class names need to fit the customDataset class used e.g. 3 classes -> use CustomDataset3Classes
#class_names = ['healthy', 'slightly_stressed', 'moderately_stressed', 'highly_stressed', 'dead']
#class_names = ['healthy', 'moderately_stressed', 'highly_stressed', 'dead']
class_names = ['healthy', 'stressed', 'dead']

# set seeds
g = torch.Generator()
g.manual_seed(42)
NUM_WORKERS=3 # should be changed, depending on the system used
batch_size=32

#### Define variables and parameters

In [17]:
# 1. Define number of epochs
epochs = 50
n_bands = sub_image_set[0].shape[2] # get number of bands

# 2. Define model
num_classes = len(class_names)
unfreeze = True # all layer weights get updated
dropout_rate = 0.5 #define dropout rate
model_name = "EffNet_b7_RGB_3classes_v2"

# 3. Define loss, optimizer and learning rate scheduler
lr = 0.005 # define learning rate
min_lr = 1e-6 # minimum learning rate threshold
gamma = 0.75 # how fast the learning rate decreases per epoch (low number=faster decrease)
patience = 10

# 4. Create target folder name were to save the tensorboard event files
experiment_name = 'RGB_3classes_v2'
extra = "RGB_3classes_v2"

#----------------------------------------------------------------------
#torch.cuda.empty_cache()
print(f"Memory allocated: {torch.cuda.memory_allocated()} bytes") 

Memory allocated: 0 bytes


#### Run k-Fold cross-validation on EfficientNet

In [None]:
%%time

# Set the random seeds
set_seeds(42)

# Define the number of folds
num_folds = 5

# group the hashIDs to get the unique values of hashIDs remaining in the subset  
groups = sub_hash_id[:, 0] 
print("ORIGINAL POSITIVE RATIO:", sub_label_set.mean())
# create a StratifiedGroupKFold instance
kf = StratifiedGroupKFold(n_splits=num_folds, shuffle=True, random_state=42)

# loop through the folds
for fold, (train_ids, val_ids) in enumerate(kf.split(sub_image_set, sub_label_set, groups)):
    print("Fold :", fold)
    print("TRAIN POSITIVE RATIO:", sub_label_set[train_ids].mean())
    print("VAL POSITIVE RATIO  :", sub_label_set[val_ids].mean())
    print("LENGTH TRAIN GROUPS :", len(set(groups[train_ids])))
    print("LENGTH VAL GROUPS   :", len(set(groups[val_ids])))
    
    train_ids = sub_hash_id[:, 0][train_ids]
    val_ids = np.unique(sub_hash_id[:, 0][val_ids])

    torch.cuda.empty_cache()
    
    # Initialize a new wandb run for this fold
    wandb.init(project='wze-uav-v2', name=f"fold_{fold + 1}_{extra}")
    
    # 1. Split data into train and validation set
    # Get the training and testing data for this fold
    # Use np.isin() to create boolean arrays indicating which indices belong to train or test sets
    train_indices = np.isin(sub_hash_id[:,0], train_ids)
    
    val_indices = np.zeros_like(train_indices)  # initialize to all False
    for hash_id_val in val_ids:
        # select one image ID randomly from either 2020 or 2021 or 2022 for each unique hash ID in the test set
        temp = np.unique(sub_hash_id[(sub_hash_id[:, 0] == hash_id_val), 1]) # check how many years are available per hashID
        if len(temp) == 1:
            year = temp[0]
        elif len(temp) == 2:
            year = np.random.choice(temp)
        else:
            year = np.random.choice(temp)
        
        # select image ID using the conditions
        image_ids = sub_hash_id[(sub_hash_id[:,0] == hash_id_val) & (sub_hash_id[:,1] == year), 0]
    
        # mark the index corresponding to the selected image ID and hash ID as True in the test indices array
        val_indices[(sub_hash_id[:,0] == hash_id_val) & (sub_hash_id[:,1] == year) & (np.isin(sub_hash_id[:,0], image_ids))] = True 
    
    # Reshape boolean arrays to match shape of image_set and label_set
    train_indices = train_indices.reshape(-1, 1)
    val_indices = val_indices.reshape(-1, 1)
    
    # Select images and labels for train and validation sets
    train_image_set = sub_image_set[train_indices[:, 0]]
    train_label_set = sub_label_set[train_indices[:, 0]]
    train_hash_id = sub_hash_id[train_indices[:, 0]][:,0]
    train_species_set = sub_species_set[train_indices[:, 0]]
    val_image_set = sub_image_set[val_indices[:, 0]]
    val_label_set = sub_label_set[val_indices[:, 0]]
    val_hash_id = sub_hash_id[val_indices[:, 0]][:,0]
    val_species_set = sub_species_set[val_indices[:, 0]]
    # reshape 
    train_label_set = train_label_set.reshape(-1, 1)
    val_label_set = val_label_set.reshape(-1, 1)
    train_species_set = train_species_set.reshape(-1, 1)
    val_species_set = val_species_set.reshape(-1, 1)
    
    # check if there are any group overlaps between the data splits
    hash_set = set(train_hash_id)
    val_hash_set = set(val_hash_id)
    test_hash_set = set(test_hash_id[:, 0].flatten())
    intersection = hash_set.intersection(val_hash_set)
    intersection2 = test_hash_set.intersection(val_hash_set)
    intersection3 = hash_set.intersection(test_hash_set)
    if intersection:
        print(f"Hash_id values in both train and val sets: {len(intersection)}")
        print(f"Hash_id values in both test and val sets: {len(intersection2)}")
        print(f"Hash_id values in both train and test sets: {len(intersection3)}")
    else:
        print("There are no same hash_id values in train, val or test datasets. The datasplit was successful")
    
         
    print("Check shapes:\n")
    print(f"Images train dataset: {train_image_set.shape}")
    print(f"Labels train dataset: {train_label_set.shape}\n")
    
    print(f"Images validation dataset: {val_image_set.shape}")
    print(f"Labels validation dataset: {val_label_set.shape}\n")
    print('-'*50)
    print (f"Check if the split was stratified: (random_state=42)")
    print(f"Healthy trees in train dataset: {np.count_nonzero(train_label_set == 0)}")
    print(f"Stressed trees in train dataset: {np.count_nonzero(train_label_set == 1)}")
    print(f"Dead trees in train dataset: {np.count_nonzero(train_label_set == 2)}")
    print(f"Healthy trees in validation dataset: {np.count_nonzero(val_label_set == 0)}")
    print(f"Stressed trees in validation dataset: {np.count_nonzero(val_label_set == 1)}")
    print(f"Dead trees in validation dataset: {np.count_nonzero(val_label_set == 2)}")
    print(f"Ratio health trees in validation dataset: {np.count_nonzero(val_label_set == 0)/np.count_nonzero(sub_label_set == 0)}")
    print(f"Ratio stressed trees in validation dataset: {np.count_nonzero(val_label_set == 1)/np.count_nonzero(sub_label_set == 1)}")
    print(f"Ratio dead trees in validation dataset: {np.count_nonzero(val_label_set == 2)/np.count_nonzero(sub_label_set == 2)}")
    print("-"*50)
   
    # 2. Create train and validation dataset. (choose custom dataset loader with 3 - 5 classes)
    print(f"\nCreating datasets for fold: {fold + 1}\n")
    train_dataset = data_loader.CustomDataset(data=train_image_set, labels=train_label_set, class_names=class_names, species = train_species_set,
                                                         transform=transform_train)
    
    val_dataset = data_loader.CustomDataset(data=val_image_set, labels=val_label_set, class_names=class_names,
                                                       species = val_species_set, transform=transform)
   
    # 3. Create train and validation dataloader
    # create sampler for oversampling of the minority classes
    sampler = data_loader.data_sampler(dataset=train_dataset, class_names=class_names)
    print(f"Creating dataloaders for fold: {fold +1}\n")
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, persistent_workers=True, pin_memory=True, num_workers=NUM_WORKERS, generator=g,
                              sampler=sampler, shuffle=False, drop_last=True) # shuffle false because of the sampler

    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, persistent_workers=True, pin_memory=True, num_workers=NUM_WORKERS, shuffle=False,
                             drop_last=True)
    
    #model = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)
    model = models.create_effnetb7(output_shape=num_classes, unfreeze=True, dropout_rate=dropout_rate, device=device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
    lr_scheduler = CustomExponentialLR(optimizer, gamma=gamma, min_lr=min_lr)

    fold += 1
    print(f"\n[INFO] Fold number: {fold}")
    print(f"[INFO] Number of epochs: {epochs}")
    print(f"[INFO] Batch_size: {batch_size}")
    print(f"[INFO] Number of bands: {n_bands}")
    print(f"[INFO] Dropout rate: {dropout_rate}")
    print(f"[INFO] Gamma learning rate: {gamma}")
    print(f"[INFO] Memory allocated: {torch.cuda.memory_allocated()} bytes")
    # 4. Train model with k fold dataloaders and track experiments
    
    if fold == 1:
        fold1_results = train(model=model, model_name=model_name, n_bands=n_bands, batch_size=batch_size,train_dataloader=train_dataloader, val_dataloader=val_dataloader, 
                        optimizer=optimizer, loss_fn=loss_fn, lr_scheduler=lr_scheduler, num_classes=num_classes, epochs=epochs, experiment_num=fold, device=device,
                        writer=None, early_stop_patience = patience)
       
    elif fold == 2:
        fold2_results = train(model=model, model_name=model_name, n_bands=n_bands, batch_size=batch_size,train_dataloader=train_dataloader, val_dataloader=val_dataloader, 
                        optimizer=optimizer, loss_fn=loss_fn, lr_scheduler=lr_scheduler, num_classes=num_classes, epochs=epochs, experiment_num=fold, device=device,
                        writer=None, early_stop_patience = patience)
    elif fold == 3:
        fold3_results = train(model=model, model_name=model_name, n_bands=n_bands, batch_size=batch_size,train_dataloader=train_dataloader, val_dataloader=val_dataloader, 
                        optimizer=optimizer, loss_fn=loss_fn, lr_scheduler=lr_scheduler, num_classes=num_classes, epochs=epochs, experiment_num=fold, device=device,
                        writer=None, early_stop_patience = patience)
    elif fold == 4:
        fold4_results = train(model=model, model_name=model_name, n_bands=n_bands, batch_size=batch_size,train_dataloader=train_dataloader, val_dataloader=val_dataloader, 
                        optimizer=optimizer, loss_fn=loss_fn, lr_scheduler=lr_scheduler, num_classes=num_classes, epochs=epochs, experiment_num=fold, device=device,
                        writer=None, early_stop_patience = patience)
    else:
        fold5_results = train(model=model, model_name=model_name, n_bands=n_bands, batch_size=batch_size,train_dataloader=train_dataloader, val_dataloader=val_dataloader, 
                        optimizer=optimizer, loss_fn=loss_fn, lr_scheduler=lr_scheduler, num_classes=num_classes, epochs=epochs, experiment_num=fold, device=device,
                        writer=None, early_stop_patience = patience)
    
    del train_indices, val_indices, train_image_set, train_label_set, train_hash_id, train_species_set, val_image_set, val_label_set, val_hash_id, val_species_set,
    train_dataset, val_dataset, sampler, train_dataloader, val_dataloader, model, loss_fn, optimizer, lr_scheduler
    
    #finish the wandb run
    wandb.finish()
    print("Deleting variables and emptying cache")
    gc.collect()
    torch.cuda.empty_cache()
    print(f"Memory allocated: {torch.cuda.memory_allocated()} bytes")
    print("-"*50 + "\n")

ORIGINAL POSITIVE RATIO: 0.4286353181702019
Fold : 0
TRAIN POSITIVE RATIO: 0.43211854225070084
VAL POSITIVE RATIO  : 0.41490369434796337
LENGTH TRAIN GROUPS : 4710
LENGTH VAL GROUPS   : 1190


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.209206…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

There are no same hash_id values in train, val or test datasets. The datasplit was successful
Check shapes:

Images train dataset: (12485, 250, 250, 3)
Labels train dataset: (12485, 1)

Images validation dataset: (1190, 250, 250, 3)
Labels validation dataset: (1190, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in train dataset: 7285
Stressed trees in train dataset: 5005
Dead trees in train dataset: 195
Healthy trees in validation dataset: 724
Stressed trees in validation dataset: 437
Dead trees in validation dataset: 29
Ratio health trees in validation dataset: 0.078627280625543
Ratio stressed trees in validation dataset: 0.07072341802880724
Ratio dead trees in validation dataset: 0.10943396226415095
--------------------------------------------------

Creating datasets for fold: 1

Creating dataloaders for fold: 1

[INFO] Created new effnet_b7 model.

[INFO] Fold number: 1
[INFO] Number of epochs: 50
[INFO] Ba

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 
Learning rate: 0.005
Train loss: 0.7745 | Train precision: 0.5220 | Train recall: 0.4636 | Train f1score: 0.4840 | Train acc: 0.5612 | Train kappa: 0.1989 
Val loss: 0.8488 | Val precision: 0.1224 | Val recall: 0.3318 | Val f1score: 0.1788 | Val acc: 0.3666 | Val kappa: 0.0000 

Epoch: 2 
Learning rate: 0.00375
Train loss: 0.7064 | Train precision: 0.6059 | Train recall: 0.5611 | Train f1score: 0.5803 | Train acc: 0.6031 | Train kappa: 0.3339 
Val loss: 1.4486 | Val precision: 0.4507 | Val recall: 0.6372 | Val f1score: 0.3994 | Val acc: 0.4671 | Val kappa: 0.2243 

Epoch: 3 
Learning rate: 0.0028125
Train loss: 0.6695 | Train precision: 0.6660 | Train recall: 0.6407 | Train f1score: 0.6525 | Train acc: 0.6380 | Train kappa: 0.3973 
Val loss: 0.9433 | Val precision: 0.4196 | Val recall: 0.3577 | Val f1score: 0.2419 | Val acc: 0.4181 | Val kappa: 0.0625 

Epoch: 4 
Learning rate: 0.002109375
Train loss: 0.6512 | Train precision: 0.6951 | Train recall: 0.6829 | Train f1score: 0.

Epoch: 28 
Learning rate: 2.1165284760890633e-06
Train loss: 0.6015 | Train precision: 0.7464 | Train recall: 0.7314 | Train f1score: 0.7387 | Train acc: 0.6826 | Train kappa: 0.4846 
Val loss: 0.6084 | Val precision: 0.7300 | Val recall: 0.7009 | Val f1score: 0.7146 | Val acc: 0.6782 | Val kappa: 0.4310 

Early stopping after epoch 28


0,1
learning_rate,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_f1_score,▁▃▅▆▆▇▇▇▇▇▇▇▇████████████▇██
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1_score,▁▄▂▃▄██▅▇██▆████████████████
val_loss,▃█▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
learning_rate,0.0
train_f1_score,0.73868
train_loss,0.6015
val_f1_score,0.71463
val_loss,0.60835


Deleting variables and emptying cache
Memory allocated: 1059180032 bytes
--------------------------------------------------

Fold : 1
TRAIN POSITIVE RATIO: 0.4259363220787553
VAL POSITIVE RATIO  : 0.43920829406220546
LENGTH TRAIN GROUPS : 4712
LENGTH VAL GROUPS   : 1188


There are no same hash_id values in train, val or test datasets. The datasplit was successful
Check shapes:

Images train dataset: (12469, 250, 250, 3)
Labels train dataset: (12469, 1)

Images validation dataset: (1188, 250, 250, 3)
Labels validation dataset: (1188, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in train dataset: 7359
Stressed trees in train dataset: 4909
Dead trees in train dataset: 201
Healthy trees in validation dataset: 660
Stressed trees in validation dataset: 502
Dead trees in validation dataset: 26
Ratio health trees in validation dataset: 0.07167680278019113
Ratio stressed trees in validation dataset: 0.08124291956627286
Ratio dead trees in validation dataset: 0.09811320754716982
--------------------------------------------------

Creating datasets for fold: 2

Creating dataloaders for fold: 2

[INFO] Created new effnet_b7 model.

[INFO] Fold number: 2
[INFO] Number of epochs: 50
[INFO] 

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 
Learning rate: 0.005
Train loss: 0.8153 | Train precision: 0.4519 | Train recall: 0.3994 | Train f1score: 0.4123 | Train acc: 0.5305 | Train kappa: 0.1306 
Val loss: 0.9768 | Val precision: 0.3179 | Val recall: 0.3321 | Val f1score: 0.2461 | Val acc: 0.5507 | Val kappa: -0.0075 

Epoch: 2 
Learning rate: 0.00375
Train loss: 0.7104 | Train precision: 0.5888 | Train recall: 0.5680 | Train f1score: 0.5772 | Train acc: 0.6105 | Train kappa: 0.3351 
Val loss: 0.6870 | Val precision: 0.5794 | Val recall: 0.6609 | Val f1score: 0.5827 | Val acc: 0.6267 | Val kappa: 0.3299 

Epoch: 3 
Learning rate: 0.0028125
Train loss: 0.6837 | Train precision: 0.6300 | Train recall: 0.6023 | Train f1score: 0.6147 | Train acc: 0.6237 | Train kappa: 0.3660 
Val loss: 0.7274 | Val precision: 0.4264 | Val recall: 0.4368 | Val f1score: 0.4291 | Val acc: 0.6385 | Val kappa: 0.3313 

Epoch: 4 
Learning rate: 0.002109375
Train loss: 0.6592 | Train precision: 0.6724 | Train recall: 0.6387 | Train f1score: 0

Epoch: 28 
Learning rate: 2.1165284760890633e-06
Train loss: 0.6059 | Train precision: 0.7363 | Train recall: 0.7281 | Train f1score: 0.7322 | Train acc: 0.6669 | Train kappa: 0.4577 
Val loss: 0.5907 | Val precision: 0.7784 | Val recall: 0.7794 | Val f1score: 0.7787 | Val acc: 0.6976 | Val kappa: 0.4803 

Epoch: 29 
Learning rate: 1.5873963570667977e-06
Train loss: 0.5916 | Train precision: 0.7562 | Train recall: 0.7390 | Train f1score: 0.7473 | Train acc: 0.6890 | Train kappa: 0.4914 
Val loss: 0.5912 | Val precision: 0.7778 | Val recall: 0.7788 | Val f1score: 0.7782 | Val acc: 0.6968 | Val kappa: 0.4789 

Epoch: 30 
Learning rate: 1.1905472678000981e-06
Train loss: 0.6057 | Train precision: 0.7356 | Train recall: 0.7240 | Train f1score: 0.7297 | Train acc: 0.6719 | Train kappa: 0.4645 
Val loss: 0.5919 | Val precision: 0.7732 | Val recall: 0.7748 | Val f1score: 0.7736 | Val acc: 0.6892 | Val kappa: 0.4668 

Epoch: 31 
Learning rate: 1e-06
Train loss: 0.6018 | Train precision: 0.7532

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
learning_rate,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_f1_score,▁▄▅▆▇▇▇▇█▇████████████████████████████
train_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁
val_f1_score,▁▅▃▆▇▇▇▇▇▇████████████████████████████
val_loss,█▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
learning_rate,0.0
train_f1_score,0.74078
train_loss,0.60007
val_f1_score,0.77649
val_loss,0.59116


Deleting variables and emptying cache
Memory allocated: 1067544064 bytes
--------------------------------------------------

Fold : 2
TRAIN POSITIVE RATIO: 0.42986172168491726
VAL POSITIVE RATIO  : 0.42375039796243236
LENGTH TRAIN GROUPS : 4713
LENGTH VAL GROUPS   : 1187


There are no same hash_id values in train, val or test datasets. The datasplit was successful
Check shapes:

Images train dataset: (12511, 250, 250, 3)
Labels train dataset: (12511, 1)

Images validation dataset: (1187, 250, 250, 3)
Labels validation dataset: (1187, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in train dataset: 7352
Stressed trees in train dataset: 4940
Dead trees in train dataset: 219
Healthy trees in validation dataset: 691
Stressed trees in validation dataset: 472
Dead trees in validation dataset: 24
Ratio health trees in validation dataset: 0.07504344048653346
Ratio stressed trees in validation dataset: 0.0763877650105195
Ratio dead trees in validation dataset: 0.09056603773584905
--------------------------------------------------

Creating datasets for fold: 3

Creating dataloaders for fold: 3

[INFO] Created new effnet_b7 model.

[INFO] Fold number: 3
[INFO] Number of epochs: 50
[INFO] B

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 
Learning rate: 0.005
Train loss: 0.8061 | Train precision: 0.4933 | Train recall: 0.4173 | Train f1score: 0.4369 | Train acc: 0.5330 | Train kappa: 0.1483 
Val loss: 1.0668 | Val precision: 0.1329 | Val recall: 0.3333 | Val f1score: 0.1900 | Val acc: 0.3986 | Val kappa: 0.0000 

Epoch: 2 
Learning rate: 0.00375
Train loss: 0.7198 | Train precision: 0.5826 | Train recall: 0.5641 | Train f1score: 0.5721 | Train acc: 0.5901 | Train kappa: 0.3190 
Val loss: 0.6971 | Val precision: 0.7312 | Val recall: 0.4194 | Val f1score: 0.4014 | Val acc: 0.6030 | Val kappa: 0.1609 

Epoch: 3 
Learning rate: 0.0028125
Train loss: 0.6871 | Train precision: 0.6542 | Train recall: 0.6244 | Train f1score: 0.6376 | Train acc: 0.6151 | Train kappa: 0.3655 
Val loss: 0.6716 | Val precision: 0.5732 | Val recall: 0.6883 | Val f1score: 0.6080 | Val acc: 0.6478 | Val kappa: 0.3828 

Epoch: 4 
Learning rate: 0.002109375
Train loss: 0.6648 | Train precision: 0.6821 | Train recall: 0.6525 | Train f1score: 0.

Epoch: 28 
Learning rate: 2.1165284760890633e-06
Train loss: 0.5921 | Train precision: 0.7609 | Train recall: 0.7502 | Train f1score: 0.7555 | Train acc: 0.6856 | Train kappa: 0.4986 
Val loss: 0.6068 | Val precision: 0.6965 | Val recall: 0.7162 | Val f1score: 0.7059 | Val acc: 0.6757 | Val kappa: 0.4370 

Epoch: 29 
Learning rate: 1.5873963570667977e-06
Train loss: 0.5964 | Train precision: 0.7625 | Train recall: 0.7390 | Train f1score: 0.7502 | Train acc: 0.6832 | Train kappa: 0.4889 
Val loss: 0.6070 | Val precision: 0.6973 | Val recall: 0.7174 | Val f1score: 0.7069 | Val acc: 0.6765 | Val kappa: 0.4389 

Epoch: 30 
Learning rate: 1.1905472678000981e-06
Train loss: 0.5982 | Train precision: 0.7492 | Train recall: 0.7443 | Train f1score: 0.7467 | Train acc: 0.6809 | Train kappa: 0.4952 
Val loss: 0.6070 | Val precision: 0.6899 | Val recall: 0.6891 | Val f1score: 0.6895 | Val acc: 0.6748 | Val kappa: 0.4321 

Early stopping after epoch 30


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
learning_rate,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_f1_score,▁▄▅▆▇▇▇▇▇██▇██████████████████
train_loss,█▅▄▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁
val_f1_score,▁▄▇▇▇█▇██▇████████████████████
val_loss,█▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
learning_rate,0.0
train_f1_score,0.74671
train_loss,0.59818
val_f1_score,0.68948
val_loss,0.60695


Deleting variables and emptying cache
Memory allocated: 1069084160 bytes
--------------------------------------------------

Fold : 3
TRAIN POSITIVE RATIO: 0.42982176957352003
VAL POSITIVE RATIO  : 0.42380025940337224
LENGTH TRAIN GROUPS : 4733
LENGTH VAL GROUPS   : 1167


There are no same hash_id values in train, val or test datasets. The datasplit was successful
Check shapes:

Images train dataset: (12568, 250, 250, 3)
Labels train dataset: (12568, 1)

Images validation dataset: (1167, 250, 250, 3)
Labels validation dataset: (1167, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in train dataset: 7387
Stressed trees in train dataset: 4960
Dead trees in train dataset: 221
Healthy trees in validation dataset: 685
Stressed trees in validation dataset: 464
Dead trees in validation dataset: 18
Ratio health trees in validation dataset: 0.07439183318853171
Ratio stressed trees in validation dataset: 0.07509305712898527
Ratio dead trees in validation dataset: 0.06792452830188679
--------------------------------------------------

Creating datasets for fold: 4

Creating dataloaders for fold: 4

[INFO] Created new effnet_b7 model.

[INFO] Fold number: 4
[INFO] Number of epochs: 50
[INFO] 

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 
Learning rate: 0.005
Train loss: 0.8011 | Train precision: 0.5121 | Train recall: 0.4784 | Train f1score: 0.4928 | Train acc: 0.5289 | Train kappa: 0.1924 
Val loss: 1.2825 | Val precision: 0.1956 | Val recall: 0.3333 | Val f1score: 0.2465 | Val acc: 0.5868 | Val kappa: 0.0000 

Epoch: 2 
Learning rate: 0.00375
Train loss: 0.7049 | Train precision: 0.6383 | Train recall: 0.6204 | Train f1score: 0.6288 | Train acc: 0.6067 | Train kappa: 0.3640 
Val loss: 0.6621 | Val precision: 0.7319 | Val recall: 0.5271 | Val f1score: 0.5478 | Val acc: 0.6389 | Val kappa: 0.2577 

Epoch: 3 
Learning rate: 0.0028125
Train loss: 0.6658 | Train precision: 0.6852 | Train recall: 0.6637 | Train f1score: 0.6739 | Train acc: 0.6374 | Train kappa: 0.4237 
Val loss: 1.0282 | Val precision: 0.4243 | Val recall: 0.6202 | Val f1score: 0.3842 | Val acc: 0.4896 | Val kappa: 0.3116 

Epoch: 4 
Learning rate: 0.002109375
Train loss: 0.6488 | Train precision: 0.7055 | Train recall: 0.6853 | Train f1score: 0.

Epoch: 28 
Learning rate: 2.1165284760890633e-06
Train loss: 0.5972 | Train precision: 0.7550 | Train recall: 0.7367 | Train f1score: 0.7453 | Train acc: 0.6787 | Train kappa: 0.4899 
Val loss: 0.5933 | Val precision: 0.6816 | Val recall: 0.7590 | Val f1score: 0.7146 | Val acc: 0.6806 | Val kappa: 0.4309 

Epoch: 29 
Learning rate: 1.5873963570667977e-06
Train loss: 0.5967 | Train precision: 0.7519 | Train recall: 0.7309 | Train f1score: 0.7408 | Train acc: 0.6751 | Train kappa: 0.4850 
Val loss: 0.5916 | Val precision: 0.7071 | Val recall: 0.7627 | Val f1score: 0.7320 | Val acc: 0.6884 | Val kappa: 0.4389 

Epoch: 30 
Learning rate: 1.1905472678000981e-06
Train loss: 0.5976 | Train precision: 0.7492 | Train recall: 0.7421 | Train f1score: 0.7456 | Train acc: 0.6786 | Train kappa: 0.4890 
Val loss: 0.5947 | Val precision: 0.6704 | Val recall: 0.7577 | Val f1score: 0.7065 | Val acc: 0.6762 | Val kappa: 0.4271 

Epoch: 31 
Learning rate: 1e-06
Train loss: 0.5999 | Train precision: 0.7398

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
learning_rate,█▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_f1_score,▁▅▆▆▇▇▇▇▇▇▇████████████████████
train_loss,█▅▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_f1_score,▁▅▃▇▇▇▆▇▇▃█▇▇█▇▇██▇██▇███▇▇▇█▇█
val_loss,█▂▅▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
learning_rate,0.0
train_f1_score,0.74039
train_loss,0.59994
val_f1_score,0.72327
val_loss,0.59086


Deleting variables and emptying cache
Memory allocated: 1063552512 bytes
--------------------------------------------------

Fold : 4
TRAIN POSITIVE RATIO: 0.4254473161033797
VAL POSITIVE RATIO  : 0.44166395840103995
LENGTH TRAIN GROUPS : 4732
LENGTH VAL GROUPS   : 1168


There are no same hash_id values in train, val or test datasets. The datasplit was successful
Check shapes:

Images train dataset: (12575, 250, 250, 3)
Labels train dataset: (12575, 1)

Images validation dataset: (1169, 250, 250, 3)
Labels validation dataset: (1169, 1)

--------------------------------------------------
Check if the split was stratified: (random_state=42)
Healthy trees in train dataset: 7449
Stressed trees in train dataset: 4902
Dead trees in train dataset: 224
Healthy trees in validation dataset: 672
Stressed trees in validation dataset: 480
Dead trees in validation dataset: 17
Ratio health trees in validation dataset: 0.07298001737619461
Ratio stressed trees in validation dataset: 0.07768247289205372
Ratio dead trees in validation dataset: 0.06415094339622641
--------------------------------------------------

Creating datasets for fold: 5

Creating dataloaders for fold: 5

[INFO] Created new effnet_b7 model.

[INFO] Fold number: 5
[INFO] Number of epochs: 50
[INFO] 

  0%|          | 0/50 [00:00<?, ?it/s]

Epoch: 1 
Learning rate: 0.005
Train loss: 0.8048 | Train precision: 0.5267 | Train recall: 0.4415 | Train f1score: 0.4686 | Train acc: 0.5195 | Train kappa: 0.1217 
Val loss: 1.0228 | Val precision: 0.3189 | Val recall: 0.6028 | Val f1score: 0.2540 | Val acc: 0.3750 | Val kappa: 0.0128 

Epoch: 2 
Learning rate: 0.00375
Train loss: 0.7075 | Train precision: 0.6405 | Train recall: 0.5924 | Train f1score: 0.6133 | Train acc: 0.6062 | Train kappa: 0.3397 
Val loss: 0.6651 | Val precision: 0.7108 | Val recall: 0.5631 | Val f1score: 0.6025 | Val acc: 0.6337 | Val kappa: 0.2716 

Epoch: 3 
Learning rate: 0.0028125
Train loss: 0.6851 | Train precision: 0.6738 | Train recall: 0.6540 | Train f1score: 0.6634 | Train acc: 0.6190 | Train kappa: 0.3974 
Val loss: 0.6702 | Val precision: 0.7282 | Val recall: 0.4476 | Val f1score: 0.4641 | Val acc: 0.6033 | Val kappa: 0.1692 

Epoch: 4 
Learning rate: 0.002109375
Train loss: 0.6564 | Train precision: 0.7014 | Train recall: 0.6776 | Train f1score: 0.

#### Create test dataset and test dataloader 

In [None]:
# create test dataset
test_dataset = data_loader.CustomTestDataset(
    data = test_image_set,
    labels = test_label_set,
    class_names=class_names, 
    species = test_species_set,
    kkl = None,
    transform=transform
)

# create test dataloader
test_dataloader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             persistent_workers=True,
                             pin_memory=True,
                             num_workers=NUM_WORKERS,
                             shuffle=False,
                             drop_last=True)

#### Perform ensembling of the five best models

In [None]:
# Setup the best model filepaths
best_model1_path = r"C:\Users\lwfeckesim\01_PyTorch\wze-uav\wze-uav-master\effnet_b0\01_18_epochs.pth"
best_model2_path = r"C:\Users\lwfeckesim\01_PyTorch\wze-uav\wze-uav-master\effnet_b0\01_18_epochs.pth"
best_model3_path = r"C:\Users\lwfeckesim\01_PyTorch\wze-uav\wze-uav-master\effnet_b0\01_18_epochs.pth"
best_model4_path = r"C:\Users\lwfeckesim\01_PyTorch\wze-uav\wze-uav-master\effnet_b0\01_18_epochs.pth"
best_model5_path = r"C:\Users\lwfeckesim\01_PyTorch\wze-uav\wze-uav-master\effnet_b0\01_18_epochs.pth"

# Instantiate a new instance of EffNetB7 (to load the saved state_dict() to)
model1 = models.create_effnetb7(output_shape=num_classes, unfreeze=unfreeze, dropout_rate=dropout_rate, device=device)
model2 = models.create_effnetb7(output_shape=num_classes, unfreeze=unfreeze, dropout_rate=dropout_rate, device=device)
model3 = models.create_effnetb7(output_shape=num_classes, unfreeze=unfreeze, dropout_rate=dropout_rate, device=device)
model4 = models.create_effnetb7(output_shape=num_classes, unfreeze=unfreeze, dropout_rate=dropout_rate, device=device)
model5 = models.create_effnetb7(output_shape=num_classes, unfreeze=unfreeze, dropout_rate=dropout_rate, device=device)
# for custom models with more than three bands as input
#model1 = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)
#model2 = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)
#model3 = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)
#model4 = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)
#model5 = model_effnet.EfficientNet.from_pretrained('efficientnet-b7', in_channels=n_bands, num_classes=num_classes, dropout_rate=dropout_rate)

# Load the saved best model state_dict()
model1.load_state_dict(torch.load(best_model1_path))
model2.load_state_dict(torch.load(best_model2_path))
model3.load_state_dict(torch.load(best_model3_path))
model4.load_state_dict(torch.load(best_model4_path))
model5.load_state_dict(torch.load(best_model5_path))

model1.eval()
model2.eval()
model3.eval()
model4.eval()
model5.eval()

# Initialize the lists to store the predictions
all_preds_model1 = []
all_preds_model2 = []
all_preds_model3 = []
all_preds_model4 = []
all_preds_model5 = []

# Loop through the test dataset and generate predictions for each model
with torch.no_grad():
    for batch in test_dataloader:
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Generate predictions for each model
        preds_model1 = model1(inputs)
        preds_model2 = model2(inputs)
        preds_model3 = model3(inputs)
        preds_model4 = model4(inputs)
        preds_model5 = model5(inputs)

        # Append the predictions to the corresponding list
        all_preds_model1.append(preds_model1.cpu().numpy())
        all_preds_model2.append(preds_model2.cpu().numpy())
        all_preds_model3.append(preds_model3.cpu().numpy())
        all_preds_model4.append(preds_model4.cpu().numpy())
        all_preds_model5.append(preds_model5.cpu().numpy())

# Concatenate the predictions from all the models
all_preds_model1 = np.concatenate(all_preds_model1)
all_preds_model2 = np.concatenate(all_preds_model2)
all_preds_model3 = np.concatenate(all_preds_model3)
all_preds_model4 = np.concatenate(all_preds_model4)
all_preds_model5 = np.concatenate(all_preds_model5)

# Calculate the ensemble predictions
ensemble_preds = np.mean([all_preds_model1, all_preds_model2, all_preds_model3, all_preds_model4, all_preds_model5], axis=0)
ensemble_labels = np.argmax(ensemble_preds, axis=1)

# Calculate the evaluation metrics for the ensemble model
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(test_labels, ensemble_labels))
print(confusion_matrix(test_labels, ensemble_labels))






In [None]:
def make_predictions(model: torch.nn.Module, 
                     test_dataloader: torch.utils.data.DataLoader,
                     device: torch.device):
    # 1. Make predictions with trained model
    y_preds = []
    y_labels = []
    species_list = []
    test_loss, test_precision, test_recall, test_f1_score, test_acc = 0, 0, 0, 0, 0
    count = 0
    model.eval()
    with torch.inference_mode():
        for X, y, species in tqdm(test_dataloader, desc="Making predictions"):
            # Send data and targets to target device
            X, y = X.to(device), y.to(device)
            # Do the forward pass
            y_logit = model(X)
            # Turn predictions from logits -> prediction probabilities -> predictions labels
            y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1)
            # Put predictions on CPU for evaluation
            y_preds.append(y_pred.cpu())
            y_labels.append(y.cpu())
            species_list.append(species)
            
            #other metrics
            test_acc += ((y_pred == y).sum().item()/len(y_pred))
            y_pred_class = y_pred.detach().cpu().numpy() 
            y_class = y.detach().cpu().numpy()
            labels = np.array([0])
            test_precision += precision_score(y_class, y_pred_class, average='macro', zero_division=0, labels=[0,1,2])
            test_recall += recall_score(y_class, y_pred_class, average='macro', zero_division=0, labels=[0,1,2])
            #test_f1_score += f1_score(y_class, y_pred_class, average='macro', zero_division=1, labels=labels)
            
            #if count >= 1:
            #    y_set = torch.cat((y_set, y))
            #    count = count + 1
            #else:
            #    y_set = y
            #    count = count + 1
            
    test_loss = test_loss / len(test_dataloader)
    test_precision = test_precision / len(test_dataloader)
    test_recall = test_recall / len(test_dataloader)
    test_f1_score = test_f1_score / len(test_dataloader)
    #test_kappa = test_kappa / len(dataloader)
    test_acc = test_acc / len(test_dataloader)
    # Concatenate list of predictions into a tensor
    y_pred_tensor = torch.cat(y_preds)
    y_labels_tensor = torch.cat(y_labels)
    test_f1_score = f1_score(y_labels_tensor.detach().cpu().numpy(), y_pred_tensor.cpu().numpy(), average='macro', zero_division=1, labels=[0,1,2])
    
    # Print classification report
    y_true = y_labels_tensor.detach().cpu().numpy()
    report = classification_report(y_true, y_pred_tensor.cpu().numpy(), target_names=class_names)
    print(report)
    
    return y_pred_tensor, y_labels_tensor, test_loss, test_recall, test_precision, test_f1_score, test_acc, y_logit, y_pred, y, y_preds

In [None]:
# 2. Setup confusion matrix instance and compare predictions to targets
#from wze_uav.analysis import *
y_pred_tensor, y_labels_tensor, test_loss, test_recall, test_precision, test_f1_score, test_acc, y_logit, y_pred, y, y_preds = make_predictions(model=best_model,
                                 test_dataloader=test_dataloader, 
                                 device=device)

y_labels_tensor = y_labels_tensor.detach().cpu().numpy()
y_pred_tensor = y_pred_tensor.detach().cpu().numpy()

#confmat = ConfusionMatrix(num_classes=num_classes, task='multiclass')
#confmat_tensor = confmat(preds=y_pred_tensor,
#                         target=test_labels)
labels = np.array([0,1,2])
confmat = confusion_matrix(y_labels_tensor, y_pred_tensor, labels=labels)

# 3. Plot the confusion matrix
fig, ax = plot_confusion_matrix(
    conf_mat=confmat, # matplotlib likes working with NumPy 
    class_names=class_names, # turn the row and column labels into class names
    figsize=(10, 7)
);

print(f"Test loss: {test_loss}")
print(f"Test precision: {test_precision}")
print(f"Test recall: {test_recall}")
print(f"Test F1score: {test_f1_score}")
#print(f"Test Kappa: {test_kappa}")
print(f"Test Accuracy: {test_acc}")
print(f"Test Logits: {y_logit}")
print(f"Test Predictions: {y_pred}")
print(f"Test Labels: {y}")

In [None]:
len(y_preds)

In [None]:
y_set.cpu()

In [None]:
test_dataset.labels

In [None]:
y_preds = []
y_labels = []
labels = np.array([0,1,2])
test_loss, test_precision, test_recall, test_f1_score, test_acc = 0, 0, 0, 0, 0
count = 0
model.eval()
with torch.inference_mode():
    for X, y in tqdm(test_dataloader, desc="Making predictions"):
        # Send data and targets to target device
        X, y = X.to(device), y.to(device)
        # Do the forward pass
        y_logit = model(X)
        # Turn predictions from logits -> prediction probabilities -> predictions labels
        y_pred = torch.softmax(y_logit, dim=1).argmax(dim=1)
        # Put predictions on CPU for evaluation
        y_preds.append(y_pred.cpu())
        y_labels.append(y.cpu())
        
        #other metrics
        test_acc += ((y_pred == y).sum().item()/len(y_pred))
        y_pred_class = y_pred.detach().cpu().numpy() 
        y_class = y.detach().cpu().numpy()
        test_precision += precision_score(y_class, y_pred_class, average='macro', zero_division=1, labels=labels)
        test_recall += recall_score(y_class, y_pred_class, average='macro', zero_division=1, labels=labels)
        #test_f1_score += f1_score(y_class, y_pred_class, average='macro', zero_division=1, labels=labels)
        
        #if count >= 1:
        #    y_set = torch.cat((y_set, y))
        #    count = count + 1
        #else:
        #    y_set = y
        #    count = count + 1
        
test_loss = test_loss / len(test_dataloader)
test_precision = test_precision / len(test_dataloader)
test_recall = test_recall / len(test_dataloader)
#test_f1_score = test_f1_score / len(test_dataloader)
#test_kappa = test_kappa / len(dataloader)
test_acc = test_acc / len(test_dataloader)
# Concatenate list of predictions into a tensor
y_pred_tensor = torch.cat(y_preds)
test_f1_score = f1_score(y_set.detach().cpu().numpy(), y_pred_tensor.cpu().numpy(), average='macro', zero_division=0, labels=[0,1,2])

# Print classification report
y_true = y_set.detach().cpu().numpy()
report = classification_report(y_true, y_pred_tensor.cpu().numpy(), target_names=class_names)
print(report)

In [None]:
test_f1_score

In [None]:
make = (y_class == y_pred_class)
make

In [None]:
torch.softmax(y_logit, dim=1).argmax(dim=1)

In [None]:
y

In [None]:
test = (y_pred == y).sum().item()/16

In [None]:
test

In [None]:
 y_pred_class = y_pred.detach().cpu().numpy() 

In [None]:
y_pred_class