In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.1-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 6.9 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 52.2 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 7.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 53.0 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling Py

In [2]:
import numpy as np
import pandas as pd
import random
import torch
import torch.nn as nn

SEED = 1234

In [3]:
def set_seeds(seed=1234):
    """Set seeds for reproducibility."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [4]:
set_seeds(seed = SEED) # Set seeds for reproducibility

In [5]:
# Set device
cuda = True
device = torch.device("cuda" if (torch.cuda.is_available() and cuda) else "cpu")
torch.set_default_tensor_type("torch.FloatTensor")
if device.type == "cuda":
    torch.set_default_tensor_type("torch.cuda.FloatTensor")
print(device)

cuda


## Preparing Dataset

In [6]:
df = pd.read_csv("./esg_bert_kpi_map.csv")

In [7]:
df = df[df.sentences.apply(lambda x : len(x.split(' ')) < 128)]
df = df.groupby(['KPI']).filter(lambda x: len(x) > 5)

In [8]:
import collections
from sklearn.model_selection import train_test_split

In [9]:
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15

In [10]:
def train_val_test_split(X, y, train_size):
    """Split dataset into data splits."""
    X_train, X_, y_train, y_ = train_test_split(X, y, train_size = TRAIN_SIZE, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_, y_, train_size = 0.5, stratify=y_)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [11]:
# Data
X = df["sentences"].values
y = df["KPI"].values

In [12]:
X.shape, y.shape

((1132,), (1132,))

In [13]:
# Create data splits
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X=X, y=y, train_size=TRAIN_SIZE)
print (f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print (f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print (f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print (f"Sample point: {X_train[0]} → {y_train[0]}")

X_train: (792,), y_train: (792,)
X_val: (170,), y_val: (170,)
X_test: (170,), y_test: (170,)
Sample point: main challenges 2021 reduce ghg_emissions by 18.46% (scopes 1 and 2) and by 14.46% (scope_3) compared to 2017, in line with the sbt at 2030 of a 1.5c reduction, and maintain carbon neutrality in its direct operations. → co2 equivalent of biogenic co2 emissions


## Label Encoder

In [14]:
import itertools

In [15]:
class LabelEncoder(object):
    
    """Label encoder for tag labels."""
    def __init__(self, class_to_index={}):
        self.class_to_index = class_to_index
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())

    def __len__(self):
        return len(self.class_to_index)

    def __str__(self):
        return f"<LabelEncoder(num_classes={len(self)})>"

    def fit(self, y):
        classes = np.unique(y)
        for i, class_ in enumerate(classes):
            self.class_to_index[class_] = i
        self.index_to_class = {v: k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())
        return self

    def encode(self, y):
        y_one_hot = np.zeros((len(y), len(self.class_to_index)), dtype=int)
        for i, item in enumerate(y):
            y_one_hot[i][self.class_to_index[item]] = 1
        return y_one_hot

    def decode(self, y):
        classes = []
        for i, item in enumerate(y):
            index = np.where(item == 1)[0][0]
            classes.append(self.index_to_class[index])
        return classes

    def save(self, fp):
        with open(fp, "w") as fp:
            contents = {'class_to_index': self.class_to_index}
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, "r") as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [16]:
# Encode
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
NUM_CLASSES = len(label_encoder)
NUM_CLASSES

63

In [17]:
# Class weights
counts = np.bincount([label_encoder.class_to_index[class_] for class_ in y_train])
class_weights = {i: 1.0/count for i, count in enumerate(counts)}
print(f"counts: {counts}\nweights: {class_weights}")

counts: [15 29  8  8 14  8  9  6 13 13 14  8  6  7  6 23  6 18  9 18  7  6  5  9
  6  9 19  4 23 29 22 11  6  9 24  8 11 13 24 33 16  4 17  6  4 23  4 10
  5  6  8  6 49  4 48  6 15  8  5  8 10  7  7]
weights: {0: 0.06666666666666667, 1: 0.034482758620689655, 2: 0.125, 3: 0.125, 4: 0.07142857142857142, 5: 0.125, 6: 0.1111111111111111, 7: 0.16666666666666666, 8: 0.07692307692307693, 9: 0.07692307692307693, 10: 0.07142857142857142, 11: 0.125, 12: 0.16666666666666666, 13: 0.14285714285714285, 14: 0.16666666666666666, 15: 0.043478260869565216, 16: 0.16666666666666666, 17: 0.05555555555555555, 18: 0.1111111111111111, 19: 0.05555555555555555, 20: 0.14285714285714285, 21: 0.16666666666666666, 22: 0.2, 23: 0.1111111111111111, 24: 0.16666666666666666, 25: 0.1111111111111111, 26: 0.05263157894736842, 27: 0.25, 28: 0.043478260869565216, 29: 0.034482758620689655, 30: 0.045454545454545456, 31: 0.09090909090909091, 32: 0.16666666666666666, 33: 0.1111111111111111, 34: 0.041666666666666664, 35: 0.125,

In [18]:
# Convert labels to tokens
print (f"y_train[0]: {y_train[0]}")
y_train = label_encoder.encode(y_train)
y_val = label_encoder.encode(y_val)
y_test = label_encoder.encode(y_test)
print (f"y_train[0]: {y_train[0]}")
print (f"decode([y_train[0]]): {label_encoder.decode([y_train[0]])}")

y_train[0]: co2 equivalent of biogenic co2 emissions
y_train[0]: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
decode([y_train[0]]): ['co2 equivalent of biogenic co2 emissions']


## Tokenizer

In [19]:
from transformers import AutoTokenizer, AutoModel, AutoConfig

In [20]:
model_name = "nbroad/ESG-BERT"
config = AutoConfig.from_pretrained(model_name)
config.num_labels = NUM_CLASSES
tokenizer = AutoTokenizer.from_pretrained(model_name)
vocab_size = len(tokenizer)
print(vocab_size)

Downloading:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/376 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

30522


In [21]:
encoded_input = tokenizer(X_train.tolist(), return_tensors="pt", padding=True, truncation=True, max_length = 512)
encoded_input

{'input_ids': tensor([[ 101, 2364, 7860,  ...,    0,    0,    0],
        [ 101, 1996, 2194,  ...,    0,    0,    0],
        [ 101, 4230, 1997,  ...,    0,    0,    0],
        ...,
        [ 101, 4888, 1997,  ...,    0,    0,    0],
        [ 101, 1996, 2591,  ...,    0,    0,    0],
        [ 101, 1996, 2334,  ...,    0,    0,    0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}

### ATTENTION MASKS

In [22]:
X_train_ids = encoded_input["input_ids"]
X_train_masks = encoded_input["attention_mask"]
print (X_train_ids.shape, X_train_masks.shape)

torch.Size([792, 203]) torch.Size([792, 203])


In [23]:
encoded_input = tokenizer(X_val.tolist(), return_tensors="pt", padding=True, truncation=True, max_length = 512)
X_val_ids = encoded_input["input_ids"]
X_val_masks = encoded_input["attention_mask"]
print (X_val_ids.shape, X_val_masks.shape)

torch.Size([170, 183]) torch.Size([170, 183])


In [24]:
encoded_input = tokenizer(X_test.tolist(), return_tensors="pt", padding=True, truncation=True, max_length = 512)
X_test_ids = encoded_input["input_ids"]
X_test_masks = encoded_input["attention_mask"]
print (X_test_ids.shape, X_test_masks.shape)

torch.Size([170, 173]) torch.Size([170, 173])


In [25]:
# Decode
print (f"{X_train_ids[0]}\n{tokenizer.decode(X_train_ids[0])}")

tensor([  101,  2364,  7860, 25682,  5547,  1043, 25619,  1035, 11768,  2011,
         2324,  1012,  4805,  1003,  1006,  9531,  2015,  1015,  1998,  1016,
         1007,  1998,  2011,  2403,  1012,  4805,  1003,  1006,  9531,  1035,
         1017,  1007,  4102,  2000,  2418,  1010,  1999,  2240,  2007,  1996,
        24829,  2102,  2012, 18540,  2692,  1997,  1037,  1015,  1012,  1019,
         2278,  7312,  1010,  1998,  5441,  6351, 21083,  1999,  2049,  3622,
         3136,  1012,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

In [26]:
# Sub-word tokens
print (tokenizer.convert_ids_to_tokens(ids=X_train_ids[0]))

['[CLS]', 'main', 'challenges', '2021', 'reduce', 'g', '##hg', '_', 'emissions', 'by', '18', '.', '46', '%', '(', 'scope', '##s', '1', 'and', '2', ')', 'and', 'by', '14', '.', '46', '%', '(', 'scope', '_', '3', ')', 'compared', 'to', '2017', ',', 'in', 'line', 'with', 'the', 'sb', '##t', 'at', '203', '##0', 'of', 'a', '1', '.', '5', '##c', 'reduction', ',', 'and', 'maintain', 'carbon', 'neutrality', 'in', 'its', 'direct', 'operations', '.', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]

### Transforming into Dataset

In [27]:
class TransformerTextDataset(torch.utils.data.Dataset):
    
    def __init__(self, ids, masks, targets):
        self.ids = ids
        self.masks = masks
        self.targets = targets

    def __len__(self):
        return len(self.targets)

    def __str__(self):
        return f"<Dataset(N={len(self)})>"

    def __getitem__(self, index):
        ids = torch.tensor(self.ids[index], dtype=torch.long)
        masks = torch.tensor(self.masks[index], dtype=torch.long)
        targets = torch.FloatTensor(self.targets[index])
        return ids, masks, targets

    def create_dataloader(self, batch_size, shuffle=False, drop_last=False):
        return torch.utils.data.DataLoader(
            dataset=self,
            batch_size=batch_size,
            shuffle=shuffle,
            drop_last=drop_last,
            pin_memory=False)

In [28]:
# Create datasets
train_dataset = TransformerTextDataset(ids=X_train_ids, masks=X_train_masks, targets=y_train)
val_dataset = TransformerTextDataset(ids=X_val_ids, masks=X_val_masks, targets=y_val)
test_dataset = TransformerTextDataset(ids=X_test_ids, masks=X_test_masks, targets=y_test)
print ("Data splits:\n"
    f"  Train dataset:{train_dataset.__str__()}\n"
    f"  Val dataset: {val_dataset.__str__()}\n"
    f"  Test dataset: {test_dataset.__str__()}\n"
    "Sample point:\n"
    f"  ids: {train_dataset[0][0]}\n"
    f"  masks: {train_dataset[0][1]}\n"
    f"  targets: {train_dataset[0][2]}")

Data splits:
  Train dataset:<Dataset(N=792)>
  Val dataset: <Dataset(N=170)>
  Test dataset: <Dataset(N=170)>
Sample point:
  ids: tensor([  101,  2364,  7860, 25682,  5547,  1043, 25619,  1035, 11768,  2011,
         2324,  1012,  4805,  1003,  1006,  9531,  2015,  1015,  1998,  1016,
         1007,  1998,  2011,  2403,  1012,  4805,  1003,  1006,  9531,  1035,
         1017,  1007,  4102,  2000,  2418,  1010,  1999,  2240,  2007,  1996,
        24829,  2102,  2012, 18540,  2692,  1997,  1037,  1015,  1012,  1019,
         2278,  7312,  1010,  1998,  5441,  6351, 21083,  1999,  2049,  3622,
         3136,  1012,   102,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
          

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [29]:
# Create dataloaders
batch_size = 32
train_dataloader = train_dataset.create_dataloader(
    batch_size=batch_size)
val_dataloader = val_dataset.create_dataloader(
    batch_size=batch_size)
test_dataloader = test_dataset.create_dataloader(
    batch_size=batch_size)
batch = next(iter(train_dataloader))
print ("Sample batch:\n"
    f"  ids: {batch[0].size()}\n"
    f"  masks: {batch[1].size()}\n"
    f"  targets: {batch[2].size()}")

Sample batch:
  ids: torch.Size([32, 203])
  masks: torch.Size([32, 203])
  targets: torch.Size([32, 63])


  from ipykernel import kernelapp as app
  app.launch_new_instance()


# Trainer

In [30]:
import torch.nn.functional as F

In [31]:
class Trainer(object):
    
    def __init__(self, model, device, loss_fn=None, optimizer=None, scheduler=None):
        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):

            # Step
            batch = [item.to(self.device) for item in batch]  # Set device
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = F.softmax(z).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.inference_mode():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                z = self.model(inputs)

                # Store outputs
                y_prob = F.softmax(z).cpu().numpy()
                y_probs.extend(y_prob)

        return np.vstack(y_probs)

    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )
        return best_model

# Model

In [32]:
transformer = AutoModel.from_pretrained(model_name)
embedding_dim = transformer.config.hidden_size
embedding_dim

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

Some weights of the model checkpoint at nbroad/ESG-BERT were not used when initializing BertModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


768

In [33]:
class Transformer(nn.Module):
    def __init__(self, transformer, dropout_p, embedding_dim, num_classes):
        super(Transformer, self).__init__()
        self.transformer = transformer
        self.dropout = torch.nn.Dropout(dropout_p)
        self.fc1 = torch.nn.Linear(embedding_dim, num_classes)

    def forward(self, inputs):
        ids, masks = inputs
        seq, pool = self.transformer(input_ids=ids, attention_mask=masks, return_dict=False)
        z = self.dropout(pool)
        z = self.fc1(z)
        return z

In [34]:
# Initialize model
dropout_p = 0.5
model = Transformer(transformer=transformer, dropout_p=dropout_p, embedding_dim=embedding_dim, num_classes=NUM_CLASSES)
model = model.to(device)
#print (model.named_parameters)

# Training

In [35]:
# Arguments
lr = 1e-4
num_epochs = 200
patience = 10

In [36]:
# Define loss
class_weights_tensor = torch.Tensor(np.array(list(class_weights.values())))
loss_fn = nn.BCEWithLogitsLoss(weight=class_weights_tensor)

In [37]:
# Define optimizer & scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=5)

In [38]:
# Trainer module
trainer = Trainer(model=model, device=device, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler)

In [39]:
best_model = trainer.train(num_epochs = num_epochs, patience = patience, train_dataloader = train_dataloader, val_dataloader = val_dataloader)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


Epoch: 1 | train_loss: 0.03924, val_loss: 0.01653, lr: 1.00E-04, _patience: 10
Epoch: 2 | train_loss: 0.01246, val_loss: 0.00894, lr: 1.00E-04, _patience: 10
Epoch: 3 | train_loss: 0.00824, val_loss: 0.00781, lr: 1.00E-04, _patience: 10
Epoch: 4 | train_loss: 0.00739, val_loss: 0.00757, lr: 1.00E-04, _patience: 10
Epoch: 5 | train_loss: 0.00711, val_loss: 0.00751, lr: 1.00E-04, _patience: 10
Epoch: 6 | train_loss: 0.00708, val_loss: 0.00751, lr: 1.00E-04, _patience: 10
Epoch: 7 | train_loss: 0.00703, val_loss: 0.00751, lr: 1.00E-04, _patience: 10
Epoch: 8 | train_loss: 0.00696, val_loss: 0.00753, lr: 1.00E-04, _patience: 9
Epoch: 9 | train_loss: 0.00695, val_loss: 0.00754, lr: 1.00E-04, _patience: 8
Epoch: 10 | train_loss: 0.00698, val_loss: 0.00755, lr: 1.00E-04, _patience: 7
Epoch: 11 | train_loss: 0.00698, val_loss: 0.00755, lr: 1.00E-04, _patience: 6
Epoch: 12 | train_loss: 0.00702, val_loss: 0.00755, lr: 1.00E-04, _patience: 5
Epoch: 13 | train_loss: 0.00696, val_loss: 0.00756, lr

In [40]:
import json
from sklearn.metrics import precision_recall_fscore_support

In [41]:
def get_performance(y_true, y_pred, classes):
    """Per-class performance metrics."""
    # Performance
    performance = {"overall": {}, "class": {}}

    # Overall performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    performance["overall"]["precision"] = metrics[0]
    performance["overall"]["recall"] = metrics[1]
    performance["overall"]["f1"] = metrics[2]
    performance["overall"]["num_samples"] = np.float64(len(y_true))

    # Per-class performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average=None)
    for i in range(len(classes)):
        performance["class"][classes[i]] = {
            "precision": metrics[0][i],
            "recall": metrics[1][i],
            "f1": metrics[2][i],
            "num_samples": np.float64(metrics[3][i]),
        }

    return performance

In [42]:
# Get predictions
test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
y_pred = np.argmax(y_prob, axis=1)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [43]:
# Determine performance
performance = get_performance(
    y_true=np.argmax(y_true, axis=1), y_pred=y_pred, classes=label_encoder.classes)
print (json.dumps(performance["overall"], indent=2))

{
  "precision": 0.011504601840736295,
  "recall": 0.09411764705882353,
  "f1": 0.02048096034117384,
  "num_samples": 170.0
}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
