In [38]:

import torch.nn as nn
import torch.optim as optim
from transformers import AutoModel, AutoTokenizer, AdamW, get_scheduler
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import f1_score
import numpy as np
from tqdm import tqdm
import spacy

# ============================
# 1. Load models:
# ============================

torch.cuda.set_device(1)

MODEL_NAME = "FacebookAI/xlm-roberta-large"  #"ai-forever/ruRoberta-large" "sergeyzh/BERTA" #"FacebookAI/xlm-roberta-large"  "ai-forever/ruBert-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
bert_model = AutoModel.from_pretrained(MODEL_NAME).to("cuda:1") 
model_name_to_save=MODEL_NAME.split('/')[1]
print (model_name_to_save)


def unfreeze_last_layer(model): # set which layers to freeze
    for name, param in model.bert.named_parameters():
        if any(layer in name for layer in ['encoder.layer.22','encoder.layer.23']):  #'pooler'#'encoder.layer.9', 'encoder.layer.10', 
            param.requires_grad = True
        else:
            param.requires_grad = False
            
            
            
class MultiLabelDataset(Dataset):
    def __init__(self, texts, labels, tfidf_features, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tfidf_features = tfidf_features
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        labels = torch.tensor(self.labels[idx], dtype=torch.float32)
        tfidf_vector = torch.tensor(self.tfidf_features[idx], dtype=torch.float32)
        
        encoding = self.tokenizer(
            text, padding="max_length", truncation=True, 
            max_length=self.max_length, return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": labels,
            "tfidf_features": tfidf_vector
        }


# ============================
# Define the model
# ============================
class RuBERTWithTFIDF(nn.Module):
    def __init__(self, bert_model, tfidf_dim, num_labels=10):
        super(RuBERTWithTFIDF, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(0.3)
        self.tfidf_layer = nn.Linear(tfidf_dim, 128)  #128
        self.relu = nn.ReLU()
        self.batch_norm = nn.LayerNorm(self.bert.config.hidden_size + 128)  
        self.classifier = nn.Linear(self.bert.config.hidden_size + 128, num_labels)

    def forward(self, input_ids, attention_mask, tfidf_features):
         
        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        tfidf_embedding = self.relu(self.tfidf_layer(tfidf_features))
        concat = torch.cat((bert_output, tfidf_embedding), dim=1)
        concat = self.batch_norm(concat)  
        logits = self.classifier(self.dropout(concat))
        return logits  


class BERTWithMeanPoolingTFIDF(nn.Module):
    def __init__(self, model_name, tfidf_dim, num_labels=10):
        super().__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(0.1)
        self.tfidf_layer = nn.Linear(tfidf_dim, 128)
        self.relu = nn.ReLU()
        self.norm = nn.LayerNorm(self.bert.config.hidden_size + 128)
        self.classifier = nn.Linear(self.bert.config.hidden_size + 128, num_labels)

    def forward(self, input_ids, attention_mask, tfidf_features):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        mean_pooled = outputs.last_hidden_state.mean(dim=1)
        tfidf_proj = self.relu(self.tfidf_layer(tfidf_features))
        combined = torch.cat((mean_pooled, tfidf_proj), dim=1)
        normed = self.norm(combined)
        logits = self.classifier(self.dropout(normed))
        return logits


xlm-roberta-large


In [6]:
import pandas as pd

res_folder='../data/annotations/'
df=pd.read_csv(res_folder+'gpt-labels-20k.csv', sep="|", encoding ='utf-8')
df.drop('Unnamed: 0', axis=1, inplace=True)
df

Unnamed: 0,user_idh,post_idh,Self-direction,Stimulation,Hedonism,Achievement,Power,Security,Conformity,Tradition,Benevolence,Universalism,If_political
0,e9deaf0313932da383cea726f92f9eb3d1c628877d3424...,c044a467d4ac05d966e48ee9df3413ac3727b2125e90f0...,11111,11111,11111,00000,00000,00000,00000,00000,11111,00000,0.0
1,c5fe892dc5e0594c0e752e9b09619507e46c19d8908452...,bc6b38f59d571d80de7a08b248917e049d2af88f2c5f33...,00110,11111,11111,00000,00000,00000,00000,00000,11000,00000,0.0
2,323c26a3aa3b8924ae899c8e25dd28437f198cf1db553f...,cb95507ecab36396c8c9a31abc9ec0e2b44f4d1cc0152a...,11101,01101,00000,11011,11111,00000,00000,00000,00000,00101,1.0
3,2e15352864c7921f447a7df8c25998aa52b2e70c882f37...,6b1d3f8627aac562aa08965624d1ccc333a45a53d0b405...,00000,00000,00001,00000,00000,00100,00000,10000,01111,00000,0.0
4,66ab20bb3078aaeb5f904a37313509c77aef6a2c632013...,aedbab70bbbca94d30c00e7bf336d93c67390e9939dad5...,10000,00000,00000,11111,11010,11000,10000,00000,00100,00000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,d9be01ec25121101f7125646dd78950f17c2b59380b597...,de5e4b3ccf1d5466c6abd047610f2f2d30b6cc930af881...,11011,00001,00000,00000,00000,00000,01100,00000,11110,00000,0.0
19996,8c2ae840d8aeddad8f237d98efeb671d7b30ca8e11fa4e...,ef56f00ee8c299eb843f0c663f3f17a4032d466eb76e42...,00000,00000,00000,00000,01100,11011,00000,00000,00000,00001,1.0
19997,727090bd2f2925c877401cf8cd9e8ef29360a0fe76c9a9...,9013b99714b49d6a19292627b108ba30a1d60b33088c90...,11111,00001,00000,11111,10000,00000,01000,00000,01000,00000,0.0
19998,66ab20bb3078aaeb5f904a37313509c77aef6a2c632013...,a5646486dad330c70811bac7bf3c8b3a1ad619a01c570e...,00000,00000,00000,10000,11001,11111,00100,10101,00000,11111,1.0


In [7]:
values_list=['Self-direction','Stimulation','Hedonism','Achievement','Power','Security','Conformity','Tradition','Benevolence','Universalism']

In [14]:
# ! Post texts have been removed from public access
texts = df.text.tolist()
annotations_ = df[values_list].applymap(lambda x: list(map(float, str(x).split(","))))
annotations=annotations_.applymap(lambda x: x if np.isnan(x).any()==False else [0,0,0,0,0]).values # где-то есть nan - заменяем на 0

# convert to soft labels

def parse_and_aggregate(label_string):
    labels = list(map(float, label_string.split(",")))  
    sum_labels = sum(labels) 
    if sum_labels == 5:
        return 1
    elif sum_labels == 4:
        return 1
    elif sum_labels == 3:
        return 0.6
    else:
        return 0
    
categorized_annotations = df[values_list].applymap(parse_and_aggregate).to_numpy()

In [42]:
from sklearn.model_selection import train_test_split

# split to train/valid
X_train_texts, X_val_texts, train_labels, val_labels= train_test_split(df, categorized_annotations, test_size=0.2, random_state=42)

In [43]:
# ============================
# TF-IDF + BERT tokenization)
# ============================
vectorizer = TfidfVectorizer(max_features=5000, max_df=0.8, min_df=2, ngram_range=(1,2))

X_train_tfidf = vectorizer.fit_transform(X_train_texts.text.to_list()).toarray()
X_val_tfidf = vectorizer.transform(X_val_texts.text.to_list()).toarray()

train_dataset = MultiLabelDataset(X_train_texts.text.to_list(), train_labels, X_train_tfidf, tokenizer)
val_dataset = MultiLabelDataset(X_val_texts.text.to_list(), val_labels, X_val_tfidf, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [13]:
import joblib
# save TF-IDF vectorizer
joblib.dump(vectorizer, "../models/xlm-roberta-large/tfidf_vectorizer_for_train_data.pkl")

In [44]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [45]:
model = RuBERTWithTFIDF(bert_model, X_train_tfidf.shape[1]).to(device)
unfreeze_last_layer(model)

# ============================
# Loss function and the optimizer
# ============================

# Class weights
class_frequencies = np.sum(train_labels, axis=0) / train_labels.shape[0]
class_weights = 1.0 / (class_frequencies + 1e-5)
class_weights /= np.max(class_weights)  # normalize weights

# convert weights to tensor for PyTorch
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

print("Class Weights:", class_weights_tensor)

class FocalLoss(nn.Module):
    def __init__(self, class_weights, gamma=0.3):
        super(FocalLoss, self).__init__()
        self.class_weights = class_weights  
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction="none")  

    def forward(self, outputs, targets):
        bce_loss = self.bce(outputs, targets)
        p_t = torch.exp(-bce_loss)  
        
        # Focal Loss
        focal_loss = (1 - p_t) ** self.gamma * bce_loss

        weighted_focal_loss = focal_loss * self.class_weights.to(outputs.device)

        return weighted_focal_loss.mean()

criterion = FocalLoss(class_weights=class_weights_tensor, gamma=0.3)

optimizer = optim.Adam(model.parameters(), lr=3e-4)

lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=10 * len(train_loader))



Class Weights: tensor([0.2388, 0.4283, 0.6886, 0.5962, 0.6953, 0.7139, 1.0000, 0.7191, 0.2712,
        0.6631], device='cuda:1')


In [47]:
# ============================
# Training model
# ============================
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=5, device=device):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=True)
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            tfidf_features = batch["tfidf_features"].to(device)
           
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask, tfidf_features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            total_loss += loss.item()
            
            progress_bar.set_postfix(loss=loss.item())
        
        print(f"Epoch {epoch+1}, Training Loss: {total_loss/len(train_loader)}")
        validate(model, val_loader, criterion, device)  #!!!
        save_path = "../models/"+model_name_to_save+"/model_finetuned_"+model_name_to_save+"_gamma0_3_"+str(epoch+1)+".pth"
        torch.save(model.state_dict(), save_path)


# ============================
# Validation (F1-macro, F1-binary)
# ============================
def validate(model, val_loader, criterion, device=device):  
    model.eval()
    total_loss_val=0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:   
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].cpu().numpy()
            tfidf_features = batch["tfidf_features"].to(device)

            outputs = model(input_ids, attention_mask, tfidf_features) 
            
            labels = batch["labels"].to(device).float()

            loss_val = criterion(outputs, labels)
            total_loss_val += loss_val.item()
            
            all_preds.append(outputs)
            all_labels.append(labels)

    avg_loss_val = total_loss_val / len(val_loader)  #!!!
    print(f"Validation Loss: {avg_loss_val:.4f}")
    
#     only for cpu:
#     all_preds = np.vstack(all_preds)
#     all_labels = np.vstack(all_labels)

#    Only gor gpu:
    all_preds = np.vstack([t.cpu().numpy() for t in all_preds])  
    all_labels = np.vstack([t.cpu().numpy() for t in all_labels])
    all_preds = torch.sigmoid(torch.tensor(all_preds)).numpy()

    # convert to binary
    binary_preds = (all_preds > 0.5).astype(int)
    binary_labels = (all_labels > 0.5).astype(int)
    
    f1_macro = f1_score(binary_labels, binary_preds, average='macro')
    f1_binary = f1_score(binary_labels, binary_preds, average='micro')
    
    print(f"Validation F1-macro: {f1_macro:.4f}, F1-binary: {f1_binary:.4f}")
    model.train()

In [48]:
def predict_from_loader(model, data_loader, device=device):
    model.eval()
    all_preds = []
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Predicting"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            tfidf_features = batch["tfidf_features"].to(device)
            logits = model(input_ids, attention_mask, tfidf_features)
            probs = torch.sigmoid(logits).cpu().numpy()         
            all_preds.append(probs)
    return np.vstack(all_preds)

In [49]:
# ============================
# Launch training
# ============================
train_model(model, train_loader, val_loader, optimizer, criterion, epochs=5)


Epoch 1: 100%|██████████| 1000/1000 [11:56<00:00,  1.40it/s, loss=0.0836]


Epoch 1, Training Loss: 0.11739610173925757
Validation Loss: 0.0964
Validation F1-macro: 0.5262, F1-binary: 0.5819


Epoch 2: 100%|██████████| 1000/1000 [11:54<00:00,  1.40it/s, loss=0.0819]


Epoch 2, Training Loss: 0.09178518913686276
Validation Loss: 0.0889
Validation F1-macro: 0.6357, F1-binary: 0.6862


Epoch 3: 100%|██████████| 1000/1000 [11:52<00:00,  1.40it/s, loss=0.0592]


Epoch 3, Training Loss: 0.08014416098222137
Validation Loss: 0.0873
Validation F1-macro: 0.6428, F1-binary: 0.6819


Epoch 4: 100%|██████████| 1000/1000 [11:51<00:00,  1.40it/s, loss=0.0873]


Epoch 4, Training Loss: 0.06976181851699949
Validation Loss: 0.0880
Validation F1-macro: 0.6451, F1-binary: 0.6823


Epoch 5: 100%|██████████| 1000/1000 [11:51<00:00,  1.40it/s, loss=0.0431]


Epoch 5, Training Loss: 0.06022479407861829
Validation Loss: 0.0951
Validation F1-macro: 0.6637, F1-binary: 0.6931


In [16]:
# ! save model
# import os
# save_path = "../models/"+model_name_to_save+"/"+model_name_to_save+"_finetuned.pth"
# torch.save(model.state_dict(), save_path)


In [51]:
# Predict on validation data
val_predictions = predict_from_loader(model, val_loader)

Predicting: 100%|██████████| 250/250 [02:19<00:00,  1.80it/s]


! Calculate metrics

In [55]:
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import json

def find_best_thresholds(y_true, y_pred_probs, values_list):
    best_thresholds = {}
    y_true_binary=(val_labels >= 0.6).astype(int) 
    for i, value in enumerate(values_list):
        precision, recall, thresholds = precision_recall_curve(y_true_binary[:,i], y_pred_probs[:, i])
        f1 = 2 * precision[:-1] * recall[:-1] / (precision[:-1] + recall[:-1] + 1e-8)
        idx = np.argmax(f1)
        best_thresh = thresholds[idx] if idx < len(thresholds) else 0.5
        best_thresholds[value] = round(float(best_thresh), 3)
        
    # Optimization of a single global threshold
    flat_true = y_true_binary.flatten()
    flat_probs = y_pred_probs.flatten()
    thresholds = np.linspace(0.1, 0.9, 81)
    best_thresh_global = thresholds[np.argmax([f1_score(flat_true, flat_probs > t) for t in thresholds])]
    best_thresholds['GLOBAL'] = round(float(best_thresh_global), 3)
    print(f"Global threshold maximizing overall F1: {best_thresh_global:.3f}")
    
    with open("../models/"+model_name_to_save+"/"+model_name_to_save+"_thresholds.json", "w") as f:
        json.dump({k: float(v) for k, v in best_thresholds.items()}, f, indent=2)
    
    return best_thresholds

In [56]:
best_thresholds=find_best_thresholds(val_labels, val_predictions, values_list)

Global threshold maximizing overall F1: 0.340


In [57]:
best_thresholds

{'Self-direction': 0.324,
 'Stimulation': 0.241,
 'Hedonism': 0.357,
 'Achievement': 0.344,
 'Power': 0.441,
 'Security': 0.41,
 'Conformity': 0.199,
 'Tradition': 0.264,
 'Benevolence': 0.465,
 'Universalism': 0.425,
 'GLOBAL': 0.34}

In [58]:
binary_predictions = (val_predictions >best_thresholds['GLOBAL']).astype(int)
binary_y_val=(val_labels >= 0.6).astype(int)  #y_test

print("Accuracy:", accuracy_score(binary_y_val.flatten(), binary_predictions.flatten()))
print("Classification Report:")
print(classification_report(binary_y_val.flatten(), binary_predictions.flatten()))

Accuracy: 0.907225
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.94      0.94     33668
           1       0.70      0.73      0.71      6332

    accuracy                           0.91     40000
   macro avg       0.82      0.84      0.83     40000
weighted avg       0.91      0.91      0.91     40000



# Prediction for Test data

In [11]:
import glob

res_folder='../data/annotations/'
df_test=pd.read_csv(res_folder+'gpt-labels-testdata-4k.csv', sep="|", encoding ='utf-8')
df_test.drop('Unnamed: 0', axis=1, inplace=True)
df_test

Unnamed: 0,user_idh,post_idh,Self-direction,Stimulation,Hedonism,Achievement,Power,Security,Conformity,Tradition,Benevolence,Universalism
0,4216821a041e700782114caac8cc624417097150ea89fd...,20f657e6b1850a8c930abc63417b7adb9d45002370a99c...,00000,00000,00000,00000,00000,01001,01000,00000,11111,00000
1,deec49a0270cddec42ddb1279fb4130cd31ffe0bb335b8...,e2376a79db852eac32fb288b11e3fba92ac957047f7a53...,00000,00000,00000,00000,00000,10011,00000,00100,11111,01000
2,70d9bf963dd27255db4bb4633ec8dc20cfef78476a2be8...,7bd7dc5d600a4845920e9912eabf18ddd0dcfdfccc60ba...,00000,00000,00000,00000,00000,00000,00000,01110,11111,00000
3,e7021e2ffb0f6020336fd9e6ec18c4cdb12b251fcd2c26...,db32d77983d3f4d52d9455a68c8ea958e73a88101859c6...,00000,00000,11000,00000,00000,00000,00000,00000,11111,00000
4,04a0fc4429c6972b2c78b7550f925bcc4a9279d7238843...,3f26e05ad9a49f60b7e0cc328ee31d3a09385839e2e301...,00100,00000,00000,00110,00000,00000,00000,00000,11111,11000
...,...,...,...,...,...,...,...,...,...,...,...,...
3995,15866c9fd3ed30399128d19cc5bc25a622d56aefd1a097...,51ceda055469d5a0e52b062eee744f5f9229a962af5c7e...,00000,00000,00000,00000,00000,11110,00000,00000,00010,00000
3996,9e02685bb9281821fd892fb955e132c831b5d79d1212f6...,451475081033d2421ed5f2289237ea1f970ae5542cf355...,10000,11100,11010,00000,00000,00000,00000,00001,00000,00100
3997,1ca9042d4634ddba81dc379bfb49dd4f769b77470c0022...,51ceda055469d5a0e52b062eee744f5f9229a962af5c7e...,10101,00000,01000,00000,00000,00000,00000,00000,00000,00010
3998,a077c1420d5c151d18286711b9518a8923ab62afa7f488...,594e1ad708de94db2e876bca959251391aa0306dfbc19f...,10110,00000,01100,00000,00000,00000,00000,00001,11110,00000


In [12]:
import joblib

# load TF-IDF vectorizer
vectorizer = joblib.load("../models/xlm-roberta-large/tfidf_vectorizer_for_train_data.pkl")

# Vectorize new data
X_tfidf_new = vectorizer.transform(df_test["text"]).toarray()


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [61]:
class MultiLabelDataset_for_new_data(Dataset):
    def __init__(self, texts, tfidf_features, tokenizer, max_length=512):
        self.texts = texts
#         self.labels = labels
        self.tfidf_features = tfidf_features
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]

        tfidf_vector = torch.tensor(self.tfidf_features[idx], dtype=torch.float32)
        
        encoding = self.tokenizer(
            text, padding="max_length", truncation=True, 
            max_length=self.max_length, return_tensors="pt"
        )
        
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "tfidf_features": tfidf_vector
        }

In [62]:
annotations_ = df_test[values_list].applymap(lambda x: list(map(float, str(x).split(","))))
annotations=annotations_.applymap(lambda x: x if np.isnan(x).any()==False else [0,0,0,0,0]).values 

categorized_annotations_test = df_test[values_list].applymap(parse_and_aggregate).to_numpy()

  annotations_ = df_test[values_list].applymap(lambda x: list(map(float, str(x).split(","))))
  annotations=annotations_.applymap(lambda x: x if np.isnan(x).any()==False else [0,0,0,0,0]).values # где-то есть nan - заменяем на 0
  categorized_annotations_test = df_test[values_list].applymap(parse_and_aggregate).to_numpy()


In [63]:
new_dataset = MultiLabelDataset_for_new_data(df_test.text.to_list(), X_tfidf_new, tokenizer)
new_loader = DataLoader(new_dataset, batch_size=16, shuffle=False)

In [64]:
# name="../models/"+model_name_to_save+"/model_finetuned_"+model_name_to_save+".pth"
model.load_state_dict(torch.load(name))

<All keys matched successfully>

In [65]:
predictions_test_bert = predict_from_loader(model, new_loader)

Predicting: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


! Metrics for test data

In [66]:
binary_predictions_test = (predictions_test_bert >best_thresholds['GLOBAL']).astype(int)
binary_y_test=(categorized_annotations_test >= 0.6).astype(int)  #y_test

print("Accuracy for Test Data:", accuracy_score(binary_y_test.flatten(), binary_predictions_test.flatten()))
print("Classification Report for Test Data:")
print(classification_report(binary_y_test.flatten(), binary_predictions_test.flatten()))

Accuracy for Test Data: 0.910625
Classification Report for Test Data:
              precision    recall  f1-score   support

           0       0.95      0.94      0.95     33947
           1       0.70      0.72      0.71      6053

    accuracy                           0.91     40000
   macro avg       0.82      0.83      0.83     40000
weighted avg       0.91      0.91      0.91     40000



In [67]:
# Combining metrics for the validation and test data in a combined table

def evaluate_model_dual(val_true, val_probs, test_true, test_probs, thresholds, values_list):
    records = []
    val_pred = (val_probs > np.array([thresholds[v] for v in values_list])).astype(int)
    test_pred = (test_probs > np.array([thresholds[v] for v in values_list])).astype(int)

    for i, name in enumerate(values_list):
        val_true_binary=(val_true[:, i] >= 0.6).astype(int)
        test_true_binary=(test_true[:, i] >= 0.6).astype(int)
        f1_val = f1_score(val_true_binary, val_pred[:, i])
        f1_test = f1_score(test_true_binary, test_pred[:, i])
        f1_macro_val = f1_score(val_true_binary, val_pred[:, i], average='macro')
        f1_macro_test = f1_score(test_true_binary, test_pred[:, i], average='macro')
        acc_val = accuracy_score(val_true_binary, val_pred[:, i])
        acc_test = accuracy_score(test_true_binary, test_pred[:, i])

        records.append({
            'Value': name,
            'F1': f"{f1_val:.3f} / {f1_test:.3f}",
            'F1-macro': f"{f1_macro_val:.3f} / {f1_macro_test:.3f}",
            'Accuracy': f"{acc_val:.3f} / {acc_test:.3f}"
        })
    
    df_results = pd.DataFrame(records)
    df_results.to_csv( "", index=False)
   
    return df_results

In [68]:
evaluate_model_dual(val_labels, val_predictions, categorized_annotations_test, predictions_test_bert, best_thresholds, values_list)

Unnamed: 0,Value,F1,F1-macro,Accuracy
0,Self-direction,0.758 / 0.768,0.814 / 0.823,0.831 / 0.840
1,Stimulation,0.727 / 0.725,0.828 / 0.827,0.888 / 0.886
2,Hedonism,0.647 / 0.613,0.799 / 0.779,0.914 / 0.903
3,Achievement,0.725 / 0.693,0.843 / 0.825,0.932 / 0.925
4,Power,0.797 / 0.640,0.884 / 0.809,0.951 / 0.959
5,Security,0.689 / 0.628,0.822 / 0.797,0.922 / 0.937
6,Conformity,0.512 / 0.518,0.735 / 0.740,0.923 / 0.927
7,Tradition,0.746 / 0.627,0.859 / 0.798,0.949 / 0.942
8,Benevolence,0.802 / 0.821,0.864 / 0.862,0.892 / 0.875
9,Universalism,0.615 / 0.604,0.781 / 0.782,0.908 / 0.927
