# BigEarthNet - Linear Probing

In [33]:
import torch
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from scipy.spatial import distance

In [34]:
train_feat = torch.load('./features/feat-train-bigearthnet.pth')
train_labels = torch.load('./features/labels-train-bigearthnet.pth')
test_feat = torch.load('./features/feat-test-bigearthnet.pth')
test_labels = torch.load('./features/labels-test-bigearthnet.pth')

flattened_train_features = np.array(train_feat.view(-1, 768))
flattened_train_labels = np.array(train_labels.view(-1, 43))
flattened_test_features = np.array(test_feat.view(-1, 768))
flattened_test_labels = np.array(test_labels.view(-1, 43))

In [35]:
print(flattened_train_features.shape)
print(flattened_train_labels.shape)

(269696, 768)
(269696, 43)


In [36]:
train_count = flattened_train_features.shape[0]
test_count = flattened_test_features.shape[0]

In [37]:
rf_classifier = RandomForestClassifier(
                        n_estimators=20, 
                        criterion='gini', 
                        random_state=42, 
                        max_depth=None,
                        min_samples_split=10,
                        min_samples_leaf=10,
                        max_features='sqrt',
                        max_leaf_nodes=20000,
                        bootstrap=False,
                        n_jobs=-1)
rf_classifier.fit(flattened_train_features[:train_count], flattened_train_labels[:train_count])

In [38]:
test_preds = rf_classifier.predict(flattened_test_features[:test_count])

In [49]:
def score(predictions, labels, type="f2_macro"):
    if type == "hl":
        # compute hamming distance per label
        distances = np.zeros(predictions.shape[0])
        for i in range(predictions.shape[0]):
            distances[i] = distance.hamming(predictions[i], labels[i])
        return np.average(distances)
    
    true_positives = np.zeros_like(predictions)
    false_positives = np.zeros_like(predictions)
    false_negatives = np.zeros_like(predictions)
    for i in range(len(predictions)):
        for j in range(len(predictions[i])):
            if predictions[i,j] == 1 and labels[i,j] == 1:
                true_positives[i,j] = 1
            elif predictions[i,j] == 1 and labels[i,j] == 0:
                false_positives[i,j] = 1
            elif predictions[i,j] == 0 and labels[i,j] == 1:
                false_negatives[i,j] = 1

    if type == "f2_macro":
        scores = (np.sum(5 * true_positives, axis=1)) / np.sum(5 * true_positives + 4 * false_negatives + false_positives, axis=1)
        return np.average(scores)
    elif type == "f2_micro":
        return np.sum(5 * true_positives) / np.sum(5 * true_positives + 4 * false_negatives + false_positives)

In [50]:
print("f2_macro: ", score(test_preds, flattened_test_labels[:test_count]))
print("f2_micro: ", score(test_preds, flattened_test_labels[:test_count], "f2_micro"))
print("hamming loss: ", score(test_preds, flattened_test_labels[:test_count], "hl"))

  scores = (np.sum(5 * true_positives, axis=1)) / np.sum(5 * true_positives + 4 * false_negatives + false_positives, axis=1)


f2_macro:  nan
f2_micro:  0.33728045
hamming loss:  0.05607642378311914
