In [61]:
!pip install pyarrow



In [62]:
!pip install wandb



In [63]:
!pip install datasets



In [64]:
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
from datasets import Dataset
from datasets import DatasetDict
#from datasets import load_metric

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, get_scheduler
from transformers import DebertaTokenizer, XLNetTokenizer,XLNetModel
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader
from torch.optim import AdamW
import torch

from tqdm.auto import tqdm
import zipfile
import sys

import ast
import numpy as np

In [65]:
import wandb
model_name = 'deepset/roberta-base-squad2'
wandb.init(project="forc3cl", name=model_name)

In [66]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [67]:
import torch
torch.cuda.empty_cache()

## Get data - transform into one-hot encodings

In [68]:
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
#tokenizer = DebertaTokenizer.from_pretrained("microsoft/deberta-base")

In [69]:
# for this example, only metadata of title and abstract will be used
data = pd.read_csv('train_doi.csv')

In [70]:
data = data[['title', 'abstract', 'Level1', 'Level2', 'Level3']]

In [71]:
# remove nan from abstracts
data['abstract'] = ["" if pd.isna(abstract) else abstract for abstract in data['abstract']]
# Get text
data['text'] = [row['title'] + tokenizer.sep_token + (row['abstract'] or '')
                    for index, row in data.iterrows()]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['abstract'] = ["" if pd.isna(abstract) else abstract for abstract in data['abstract']]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = [row['title'] + tokenizer.sep_token + (row['abstract'] or '')


In [72]:
# In this example, we will disregard the hierarchical structure of each level
all_labels = []
for idx, row in data.iterrows():
    labels = []
    level1 = ast.literal_eval(row['Level1'])
    labels.append(level1)

    if type(row['Level2']) != float:
        level2 = ast.literal_eval(row['Level2'])
        labels.append(level2)

    if type(row['Level3']) != float:
        level3 = ast.literal_eval(row['Level3'])
        labels.append(level3)

    labels_flat = [item for items in labels for item in items]

    all_labels.append(labels_flat)

In [73]:
data['labels'] = all_labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['labels'] = all_labels


In [74]:
data = data[['text', 'labels']]
data.head()

Unnamed: 0,text,labels
0,Automatic annotation of curricular language ta...,"[Data Management and Generation, Information E..."
1,Recognizing Behavioral Factors while Driving: ...,"[Audio Generation and Processing, Data Managem..."
2,Approaching Reflex Predictions as a Classifica...,"[Multilingual NLP, Classification Applications..."
3,FilipN@LT-EDI-ACL2022-Detecting signs of Depre...,"[Classification Applications, Domain-specific ..."
4,Still on arguments and adjuncts: the status of...,"[Parsing, Data Management and Generation, Low-..."


In [75]:
all_labels_flat = [item for items in all_labels for item in items]

In [76]:
num_categories = len(set(all_labels_flat))

In [77]:
# Encode labels with LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit_transform(list(set(all_labels_flat)))

array([155,  28,  72,  66,  65,  75,  41, 117,  34, 136,  64,  85, 143,
        76,  68, 146,  57,  53, 100, 151,  11, 122, 147,  40,  39, 135,
       118,  90,  70, 111,  82,   6, 114,   9, 144,  69,  79, 102,  67,
       125,  73,  33,   0,  32, 153, 137,  44,  35, 108,  92,   2,  24,
        62, 105,  91,  38, 107, 142, 129,  10,  18,  89, 131,  71,  31,
       130,   4, 141,  25, 115, 109, 132,  88, 104,  80,  86,  46,  43,
       116, 110,  27,  21,  52, 133, 140,  48, 145,  94,  51, 113,  60,
        58,  93,  96,  23,  42, 119, 152,  78,  77,  61,  36,  50,  45,
         5,   1, 126,  20,  83,  17,  15,  84,  29, 128, 112, 154,  13,
        14, 106, 124,  99,  16,  63,  56,  26, 120,   7, 101, 121, 138,
       103, 139, 148, 134, 150, 127,  95,  54,   3,  59,  47, 123,  30,
       149,  87,  49,  74,  55,  98,  12,  19,   8,  81,  97,  37,  22])

In [78]:
le_name_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

In [79]:
all_encoded_labels = []
for idx, row in data.iterrows():
    labels = row['labels']
    encoded_labels = [le_name_mapping[label] for label in labels]
    all_encoded_labels.append(encoded_labels)

data['encoded_labels'] = all_encoded_labels

In [80]:
all_multi_hot_labels = []
for idx, row in data.iterrows():
    categorical_labels = row['encoded_labels']

    # Convert categorical labels to multi-hot encoding
    multi_hot_encoding = np.zeros(num_categories)
    multi_hot_encoding[categorical_labels] = 1

    all_multi_hot_labels.append(multi_hot_encoding)

data['multi_hot_labels'] = all_multi_hot_labels

## Convert data into Huggingface dataset

In [81]:
train_dataset = ds.dataset(pa.Table.from_pandas(data).to_batches())

In [82]:
# Turn it into a dataset
train_dataset = Dataset(pa.Table.from_pandas(data))
train_dataset

Dataset({
    features: ['text', 'labels', 'encoded_labels', 'multi_hot_labels'],
    num_rows: 1050
})

In [83]:
# Turn it into a dictionary (why?)
dd = DatasetDict({"train":train_dataset})
dd

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'encoded_labels', 'multi_hot_labels'],
        num_rows: 1050
    })
})

In [84]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding=True, truncation=True, max_length=512)

In [85]:
tokenized_datasets = dd.map(tokenize_function, batched=True)
tokenized_datasets

Map:   0%|          | 0/1050 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'encoded_labels', 'multi_hot_labels', 'input_ids', 'attention_mask'],
        num_rows: 1050
    })
})

In [86]:
# remove unnecessary columns from dataset
tokenized_datasets = tokenized_datasets.remove_columns(["text", "labels", "encoded_labels"])
# rename the label column to labels because the model expects the argument to be named as the latter
# multi_hot_labels is now 'labels'
tokenized_datasets = tokenized_datasets.rename_column("multi_hot_labels", "labels")
# set the format of the dataset to return PyTorch instead of lists
tokenized_datasets.set_format("torch")
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 1050
    })
})

# Training with PyTorch

In [87]:
# create DataLoader objects to iterate over batches of data when training
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=False, batch_size=8)

In [88]:
# get the model
custom_model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                           num_labels=num_categories
                                                          )

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at deepset/roberta-base-squad2 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [89]:
# Loss function
criterion = torch.nn.BCEWithLogitsLoss()

# Optimizer
optimizer = torch.optim.AdamW(custom_model.parameters(), lr=1e-5)

# Training loop
num_epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model.to(device)

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

## Training loop

In [90]:
from tqdm.auto import tqdm

In [91]:
for epoch in range(num_epochs):
    custom_model.train()
    for batch in train_dataloader:
        input_ids = batch['input_ids']
        labels = batch['labels']
        input_ids, labels_batch = input_ids.to(device), labels.to(device)

        # Forward pass
        outputs = custom_model(input_ids, labels=labels_batch)  # Use labels directly for multi-label classification

        # Calculate loss
        loss = outputs.loss

        print("loss:", loss)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

loss: tensor(0.6833, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6739, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6667, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6590, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6494, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6418, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6353, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6284, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6204, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6112, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
loss: tensor(0.6059, device='cuda:0',
       grad_fn=<Binary

## Create validation predictions

In [92]:
# create validation dataloader
val_df = pd.read_csv('val_doi.csv')
val_df = val_df[['title', 'abstract', 'data_index']]
val_df['text'] = [row['title'] + tokenizer.sep_token + (row['abstract'] or '')
                    for index, row in val_df.iterrows()]
val_data = val_df[['text']]
val_dataset = ds.dataset(pa.Table.from_pandas(val_data).to_batches())
val_dataset = Dataset(pa.Table.from_pandas(val_data))
dd_val = DatasetDict({"val":val_dataset})
tokenized_val = dd_val.map(tokenize_function, batched=True)
# remove unnecessary columns from dataset
tokenized_val = tokenized_val.remove_columns(["text"])
# set the format of the dataset to return PyTorch instrad og lists
tokenized_val.set_format("torch")
val_dataloader = DataLoader(tokenized_val["val"], shuffle=False, batch_size=8)

Map:   0%|          | 0/225 [00:00<?, ? examples/s]

## Evaluation loop

In [93]:
custom_model.eval()
all_predictions = []
all_labels = []

for batch in val_dataloader:
    input_ids = batch['input_ids']
    input_ids = input_ids.to(device)

    with torch.no_grad():
        outputs = custom_model(input_ids)
        logits = outputs.logits

    predictions = torch.sigmoid(logits)
    all_predictions.append(predictions.cpu().numpy())

all_predictions = np.concatenate(all_predictions, axis=0)

## Defining N? Why?

In [94]:
len_labels = []

# Data is the training set
for label in data['labels'].values:
    len_labels.append(len(label))

len(len_labels)

1050

In [95]:
n = int(round(np.average(len_labels)))

In [96]:
top_n_preds = []
for pred in all_predictions:
    top_n_preds.append(pred.argsort()[-n:])

In [97]:
# Inverted mapping from label to class
inv_le_name_mapping = {v: k for k, v in le_name_mapping.items()}
inv_le_name_mapping

{0: 'Abstract Meaning Representation (AMR)',
 1: 'Abstractive Text Summarization',
 2: 'Active Learning',
 3: 'Adversarial Attacks and Robustness',
 4: 'Adversarial Learning',
 5: 'Anaphora Resolution',
 6: 'Annotation Processes',
 7: 'Argument Mining',
 8: 'Aspect-Based SA (ABSA)',
 9: 'Audio Generation and Processing',
 10: 'Author Detection',
 11: 'Authorship Verification',
 12: 'Automatic Speech Recognition (ASR)',
 13: 'Automatic Text Summarization',
 14: 'Bias Detection',
 15: 'Biases in NLP',
 16: 'Bilingual Lexicon Induction (BLI)',
 17: 'Biomedical NLP',
 18: 'Chatbots',
 19: 'Citation Analysis',
 20: 'Claim Verification',
 21: 'Classification Applications',
 22: 'Commonsense Reasoning',
 23: 'Community QA',
 24: 'Constituency Parsing',
 25: 'Coreference Resolution',
 26: 'Cross-lingual Application',
 27: 'Data Analysis',
 28: 'Data Augmentation',
 29: 'Data Management and Generation',
 30: 'Data Preparation',
 31: 'Data-to-Text Generation',
 32: 'Dependency Parsing',
 33: 'Di

In [98]:
# Go from categorical labels back to the original textual labels
all_text_labels = []
for preds in top_n_preds:
    text_labels = []
    for cat in preds:
        text_labels.append(inv_le_name_mapping[cat])
    all_text_labels.append(text_labels)

## Construct lists of level1, level2, level3

In [99]:
data = pd.read_csv('train_doi.csv')

In [100]:
# In this example, we will disregard the hierarchical structure of each level
level1_labels = []
level2_labels = []
level3_labels = []

for idx, row in data.iterrows():
    level1 = ast.literal_eval(row['Level1'])
    level1_labels.append(level1)

    if type(row['Level2']) != float:
        level2 = ast.literal_eval(row['Level2'])
        level2_labels.append(level2)

    if type(row['Level3']) != float:
        level3 = ast.literal_eval(row['Level3'])
        level3_labels.append(level3)

In [101]:
level1_labels_flat = [item for items in level1_labels for item in items]
level1_labels_flat = list(set(level1_labels_flat))
level2_labels_flat = [item for items in level1_labels for item in items]
level2_labels_flat = list(set(level2_labels_flat))
level3_labels_flat = [item for items in level1_labels for item in items]
level3_labels_flat = list(set(level3_labels_flat))

In [102]:
val_df

Unnamed: 0,title,abstract,data_index,text
0,Fast and Robust POS tagger for Arabic Tweets U...,Part-of-Speech POS tagging is a key step in ma...,1320,Fast and Robust POS tagger for Arabic Tweets U...
1,SAJA at TRAC 2020 Shared Task: Transfer Learni...,This paper describes the participation of the ...,373,SAJA at TRAC 2020 Shared Task: Transfer Learni...
2,Who mentions whom? Recognizing political actor...,We show that it is straightforward to train a ...,160,Who mentions whom? Recognizing political actor...
3,Association between declarative memory and lan...,Episodic memory and semantic memory are two su...,824,Association between declarative memory and lan...
4,Improving Users' Demographic Prediction via th...,"In this paper, we improve microblog users' dem...",208,Improving Users' Demographic Prediction via th...
...,...,...,...,...
220,Do sentence embeddings capture discourse prope...,We introduce four tasks designed to determine ...,1163,Do sentence embeddings capture discourse prope...
221,DUTH at SemEval-2021 Task 7: Is Conventional M...,This paper describes the approach that was dev...,1130,DUTH at SemEval-2021 Task 7: Is Conventional M...
222,Enhancing Drug-Drug Interaction Extraction fro...,We propose a novel neural method to extract dr...,292,Enhancing Drug-Drug Interaction Extraction fro...
223,Terme-\`a-LLOD: Simplifying the Conversion and...,"In recent years, there has been increasing int...",47,Terme-\`a-LLOD: Simplifying the Conversion and...


## Add predictions to val_df

In [103]:
level1_df = []
level2_df = []
level3_df = []

for labels in all_text_labels:
    row_level1 = []
    row_level2 = []
    row_level3 = []
    for label in labels:
        if label in level1_labels_flat:
            row_level1.append(label)
        elif label in level2_labels_flat:
            row_level2.append(label)
        elif label in level3_labels_flat:
            row_level3.append(label)

    level1_df.append(row_level1)
    level2_df.append(row_level2)
    level3_df.append(row_level3)

In [104]:
val_df['Level1_pred'] = level1_df
val_df['Level2_pred'] = level2_df
val_df['Level3_pred'] = level3_df

In [105]:
val_df[['data_index','Level1_pred', 'Level2_pred', 'Level3_pred']].to_csv('predictions.csv')
zipfile.ZipFile('predictions.zip', mode='w').write("predictions.csv")

## Evaluations test

In [106]:
zf = zipfile.ZipFile('predictions.zip')
preds = pd.read_csv(zf.open('predictions.csv'))

In [107]:
truth = pd.read_csv('val_doi.csv')

In [108]:
merged = truth.merge(preds, on='data_index', how='left')

In [109]:
merged.head()

Unnamed: 0.1,acl_id,Level1,Level2,Level3,abstract,url,publisher,year,month,booktitle,author,title,doi,venue,data_index,Unnamed: 0,Level1_pred,Level2_pred,Level3_pred
0,L16-1238,"['Text Preprocessing', 'Domain-specific NLP', ...","['Part-of-Speech (POS) Tagging', 'Data Prepara...",['NLP for Social Media'],Part-of-Speech POS tagging is a key step in ma...,https://aclanthology.org/L16-1238,European Language Resources Association (ELRA),2016,May,Proceedings of the Tenth International Confere...,"Albogamy, Fahad and\nRamsay, Allan",Fast and Robust POS tagger for Arabic Tweets U...,,L16,1320,0,"['Domain-specific NLP', 'Model Architectures',...",[],[]
1,2020.trac-1.16,"['Learning Paradigms', 'Domain-specific NLP', ...","['Transformer Models', 'Transfer Learning', 'H...",['NLP for Social Media'],This paper describes the participation of the ...,https://aclanthology.org/2020.trac-1.16,European Language Resources Association (ELRA),2020,May,Proceedings of the Second Workshop on Trolling...,"Tawalbeh, Saja and\nHammad, Mahmoud and\nAL-...",SAJA at TRAC 2020 Shared Task: Transfer Learni...,,trac,373,1,"['Domain-specific NLP', 'Model Architectures',...",[],[]
2,2020.parlaclarin-1.7,"['Domain-specific NLP', 'Data Management and G...","['Data Preparation', 'Search Engines', 'Named ...",,We show that it is straightforward to train a ...,https://aclanthology.org/2020.parlaclarin-1.7,European Language Resources Association,2020,May,Proceedings of the Second ParlaCLARIN Workshop,"Kerkvliet, Lennart and\nKamps, Jaap and\nMar...",Who mentions whom? Recognizing political actor...,,parlaclarin,160,2,"['Domain-specific NLP', 'Model Architectures',...",[],[]
3,2020.paclic-1.39,"['Data Management and Generation', 'Low-resour...",['Data Analysis'],,Episodic memory and semantic memory are two su...,https://aclanthology.org/2020.paclic-1.39,Association for Computational Linguistics,2020,October,Proceedings of the 34th Pacific Asia Conferenc...,"Xie, Chenwei and\nFeng, Yun and\nWang, Willi...",Association between declarative memory and lan...,,paclic,824,3,"['Domain-specific NLP', 'Model Architectures',...",[],[]
4,D16-1143,"['Data Management and Generation', 'Classifica...","['Data Preparation', 'Multilabel Text Classifi...",,"In this paper, we improve microblog users' dem...",https://aclanthology.org/D16-1143,Association for Computational Linguistics,2016,November,Proceedings of the 2016 Conference on Empirica...,"Wang, Yuan and\nXiao, Yang and\nMa, Chao an...",Improving Users' Demographic Prediction via th...,10.18653/v1/D16-1143,D16,208,4,"['Domain-specific NLP', 'Model Architectures',...",[],[]


In [110]:
merged = merged[['Level1', 'Level2', 'Level3', 'Level1_pred', 'Level2_pred', 'Level3_pred', 'data_index']]

In [111]:
# Make flat list of truth label
all_labels_truth = []
for idx, row in merged.iterrows():
    labels = []
    level1 = ast.literal_eval(row['Level1'])
    labels.append(level1)

    if type(row['Level2']) != float:
        level2 = ast.literal_eval(row['Level2'])
        labels.append(level2)

    if type(row['Level3']) != float:
        level3 = ast.literal_eval(row['Level3'])
        labels.append(level3)

    labels_flat = [item for items in labels for item in items]

    all_labels_truth.append(labels_flat)

In [112]:
merged['labels_truth'] = all_labels_truth

In [113]:
# Make flat list of predicted label
all_labels_pred = []
for idx, row in merged.iterrows():
    labels = []
    level1 = ast.literal_eval(row['Level1_pred'])
    labels.append(level1)

    if type(row['Level2']) != float:
        level2 = ast.literal_eval(row['Level2_pred'])
        labels.append(level2)

    if type(row['Level3']) != float:
        level3 = ast.literal_eval(row['Level3_pred'])
        labels.append(level3)

    labels_flat = [item for items in labels for item in items]

    all_labels_pred.append(labels_flat)

In [114]:
merged['labels_pred'] = all_labels_pred

In [115]:
merged = merged[['labels_truth', 'labels_pred', 'data_index']]

In [116]:
from sklearn.preprocessing import MultiLabelBinarizer

In [117]:
# Step 1: Convert to binary array using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
binary_labels = mlb.fit_transform(merged['labels_truth'].values)
# Step 2: Convert predicted labels to binary array
predicted_binary_labels = mlb.transform(merged['labels_pred'].values)

In [118]:
# Step 3: Evaluate using standard metrics
precision_macro = precision_score(binary_labels, predicted_binary_labels, average='macro')
recall_macro = recall_score(binary_labels, predicted_binary_labels, average='macro')
precision_weighted = precision_score(binary_labels, predicted_binary_labels, average='weighted')
recall_weighted = recall_score(binary_labels, predicted_binary_labels, average='weighted')
f1_micro = f1_score(binary_labels, predicted_binary_labels, average='micro')
f1_macro = f1_score(binary_labels, predicted_binary_labels, average='macro')
f1_weighted = f1_score(binary_labels, predicted_binary_labels, average='weighted')
f1_samples = f1_score(binary_labels, predicted_binary_labels, average='samples')

print(f"Weighted-averaged Precision: {precision_weighted}")
wandb.log({"Weighted-averaged Precision": precision_weighted})
print(f"Weighted-averaged Recall: {recall_weighted}")
wandb.log({"Weighted-averaged Recall": recall_weighted})
print(f"Micro-averaged F1-score: {f1_micro}")
print(f"Macro-averaged F1-score: {f1_macro}")
wandb.log({"Macro-averaged F1-score": f1_macro})
print(f"Weighted-averaged F1-score: {f1_weighted}")
wandb.log({"Weighted-averaged F1-score": f1_weighted})
print(f"Samples-averaged F1-score: {f1_samples}")
wandb.log({"Samples-averaged F1-score": f1_samples})

Weighted-averaged Precision: 0.10843563629690797
Weighted-averaged Recall: 0.28075970272502065
Micro-averaged F1-score: 0.3221222169587873
Macro-averaged F1-score: 0.01819955258937506
Weighted-averaged F1-score: 0.155749033928054
Samples-averaged F1-score: 0.31402221453201845


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [119]:
wandb.finish()

0,1
Macro-averaged F1-score,▁
Samples-averaged F1-score,▁
Weighted-averaged F1-score,▁
Weighted-averaged Precision,▁
Weighted-averaged Recall,▁

0,1
Macro-averaged F1-score,0.0182
Samples-averaged F1-score,0.31402
Weighted-averaged F1-score,0.15575
Weighted-averaged Precision,0.10844
Weighted-averaged Recall,0.28076
