In [1]:
import torch
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
import wandb

from GLC23PatchesProviders import MultipleRasterPatchProvider, RasterPatchProvider, JpegPatchProvider
from GLC23Datasets import PatchesDataset, PatchesDatasetMultiLabel
from models import cnn

In [2]:
# SAMPLE DATA
data_path = 'data/sample_data/' # root path of the data
presence_only_path = data_path+'Presence_only_occurrences/Presences_only_train_sample.csv'
presence_absence_path = data_path+'Presence_Absences_occurrences/Presences_Absences_train_sample.csv'

In [3]:
n_epochs = 10
batch_size = 12

In [4]:
dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(dev)

cuda:0


In [5]:
# COVARIATES
p_bioclim = MultipleRasterPatchProvider(
    data_path+'EnvironmentalRasters/Climate/BioClimatic_Average_1981-2010/'
) #19
p_hfp_d = MultipleRasterPatchProvider(data_path+'EnvironmentalRasters/HumanFootprint/detailed/') #14
p_hfp_s = RasterPatchProvider(data_path+'EnvironmentalRasters/HumanFootprint/summarized/HFP2009_WGS84.tif') #1

In [6]:
# TRAINING DATA: presence only
presence_only = PatchesDatasetMultiLabel(
    occurrences=presence_only_path, 
    providers=(p_bioclim, p_hfp_d, p_hfp_s), 
    device=dev
)
print(f"\nTRAINING DATA: n={len(presence_only)}\nin_shape = {presence_only[0][0].cpu().detach().shape}\nout_shape = {presence_only[0][1].cpu().detach().shape}")

n_features = presence_only[0][0].cpu().detach().shape[0]
n_species = len(presence_only.unique_sorted_targets)
print(f"Number of covariates = {n_features}")
print(f"Number of species = {n_species}")


TRAINING DATA: n=100
in_shape = torch.Size([6, 128, 128])
out_shape = torch.Size([96])
Number of covariates = 6
Number of species = 96


In [7]:
# VALIDATION DATA: presence absence
presence_absence = PatchesDatasetMultiLabel(
    occurrences=presence_absence_path, 
    providers=(p_bioclim, p_hfp_d, p_hfp_s),
    ref_targets=presence_only.unique_sorted_targets,
    device=dev
)
print(f"VALIDATION: n={len(presence_absence)}\nin_shape={presence_absence[0][0].cpu().detach().shape}\nout_shape={presence_absence[0][1].cpu().detach().shape}, n_species={len(presence_absence.unique_sorted_targets)}")

VALIDATION: n=100
in_shape=torch.Size([6, 128, 128])
out_shape=torch.Size([96]), n_species=96


In [9]:
train_loader = torch.utils.data.DataLoader(presence_only, shuffle=True, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(presence_absence, shuffle=True, batch_size=len(presence_absence))
model = cnn(n_features, n_species).to(dev)
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [10]:
for epoch in range(n_epochs):
    print(f"EPOCH {epoch}")

    model.train()
    for inputs, labels in tqdm(train_loader):
        # forward pass
        y_pred = model(inputs)
        #print(inputs.shape, labels.shape, y_pred.shape)
        loss = loss_fn(y_pred, labels)

        # backward pass and weight update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("LOSS: ", loss.cpu().detach())

    model.eval()
    for inputs, labels in test_loader:
        y_pred = model(inputs)
        val_loss = loss_fn(y_pred, labels)

        y_pred = y_pred.cpu().detach().numpy()
        labels = labels.cpu().detach().numpy()
        print(y_pred.shape, labels.shape)
        print(labels)
        auc_rocs = roc_auc_score(labels, y_pred, average=None)
        avg_auc = auc_rocs.mean()
        print(f"{epoch}) AVG_AUC={avg_auc}")

EPOCH 0


100%|██████████| 9/9 [00:01<00:00,  6.87it/s]


LOSS:  tensor(0.7216, dtype=torch.float64)
(100, 96) (100, 96)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [12]:
labels.shape

(100, 96)

In [17]:
import numpy as np

In [32]:
idx = np.where(labels.sum(axis=0) != 0)[0]
idx

array([ 0,  3,  9, 38, 41, 46, 57, 61, 83, 93])

In [31]:
len(labels.sum(axis=0))

96

In [34]:
labels[:,idx].shape

(100, 10)

In [28]:
y_pred[idx,:].shape

(12, 96)

In [35]:
idx = np.where(labels.sum(axis=0) != 0)[0]
idx
auc_rocs = roc_auc_score(labels[:,idx], y_pred[:,idx], average=None)

In [36]:
auc_rocs

array([0.44444444, 0.47979798, 0.51530612, 0.41414141, 0.18181818,
       0.98989899, 0.65656566, 0.06565657, 0.40306122, 0.47979798])