In [1]:
# Generic: A Modular Multi-Pipeline Framework for Probability Fusion Ensembles
# Specific: Cross-Lingual Sentiment Analysis with Probability Fusion Ensembles: A Modular Multi-Pipeline Framework for Low-Resource Languages

import os
import pandas as pd
import pprint

from pathlib import Path
from transformers import pipeline

from src.config import *
from src.metrics import evaluate_pipe
from src import (
    context,
    helper,
    sentiment, 
    utility, 
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"
helper.list_config()

if App.HAS_GPU:
    os.environ["MAMBA_USE_MAMBAPY"] = Mamba.FORCE_CUDA
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

if App.ACTION == "INFER":
    sample_texts = [
        "Maganda ang serbisyo at mabilis ang delivery!",
        "Sobrang pangit ng karanasan ko.",
        "It was okay, nothing special.",
    ]
    sentiment.infer(sample_texts, MBert)
elif App.ACTION == "ENSEMBLE":
        temps  = [1.1, 0.9]
        weights = [0.4, 0.6]
        ens = sentiment.ensemble([MBert, Mamba], temps, weights)
        print(ens)
elif App.ACTION == "TRAIN":
    mbert_context = context.setup_pipeline(MBert, require_translation = False)
    mbert_trainer = sentiment.train(mbert_context)

    #xlmr_context = context.setup_pipeline(Xlmr, require_translation = False)
    #xlmr_trainer = sentiment.train(xlmr_context)
    
    #mamba_context = context.setup_pipeline(Mamba, require_translation = True)
    #mamba_trainer = sentiment.train(mamba_context)
else:
    raise ValueError("Invalid action.")



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/214 [00:00<?, ? examples/s]

Map:   0%|          | 0/30 [00:00<?, ? examples/s]

Map:   0%|          | 0/62 [00:00<?, ? examples/s]

{'dataloader_num_workers': 1, 'eval_strategy': 'epoch', 'fp16': True, 'gradient_accumulation_steps': 2, 'greater_is_better': True, 'learning_rate': 2e-05, 'load_best_model_at_end': True, 'logging_steps': 50, 'logging_strategy': 'steps', 'lr_scheduler_type': 'linear', 'max_grad_norm': 1.0, 'metric_for_best_model': 'eval_f1_macro', 'num_train_epochs': 15, 'output_dir': './mbert_sentiment', 'per_device_eval_batch_size': 32, 'per_device_train_batch_size': 8, 'save_strategy': 'epoch', 'seed': 42, 'warmup_ratio': 0.06, 'weight_decay': 0.01}
[OPTIMIZER] epoch_start=0 global_step=0 wrapped=AcceleratedOptimizer base=AdamW lr=0.0 id=126607492403168 hyperparams={'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[OPTIMIZER_GROUP]   group[0] {'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'a

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,No log,1.052214,0.6,0.543087,0.578704,0.564815
2,No log,0.942882,0.6,0.58642,0.740741,0.601852
3,No log,0.755001,0.566667,0.463203,0.440351,0.546296
4,0.933800,0.591875,0.8,0.791855,0.803571,0.787037
5,0.933800,0.521056,0.866667,0.859259,0.896296,0.851852
6,0.933800,0.57883,0.8,0.803947,0.823077,0.796296
7,0.933800,0.562891,0.866667,0.870287,0.886905,0.861111
8,0.341900,0.5715,0.866667,0.870287,0.886905,0.861111
9,0.341900,0.715317,0.8,0.807928,0.818182,0.805556
10,0.341900,0.861991,0.8,0.807928,0.818182,0.805556


[TRAINING_PERFORMANCE] epoch=1.0 tokens=0 time=3.18s tok/s=0.0 ex/s=0.00 peak_mem=3.33GiB lr=2.00e-05
[EVALUATION_PERFORMANCE] tokens=8192 time=0.04s tok/s=225618.9 ex/s=1762.65
[EVALUATION_PERFORMANCE] accuracy=0.6000 cost=$0.00 accuracy_per_$=314.82
[OPTIMIZER] epoch_start=1.0 global_step=14 wrapped=AcceleratedOptimizer base=AdamW lr=2e-05 id=126607492403168 hyperparams={'lr': 2e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[OPTIMIZER_GROUP]   group[0] {'lr': 2e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[SCHEDULER]   scheduler_last_lr=[2e-05]
[PARAMS]   params_total=177,855,747 params_trainable=177,855,747
[TRAINING

[EVALUATION_PERFORMANCE] tokens=16384 time=0.06s tok/s=276678.7 ex/s=2161.55
[EVALUATION_PERFORMANCE] accuracy=0.8226 cost=$0.16 accuracy_per_$=5.06

### BERT-BASE-MULTILINGUAL-CASED EVALUATION METRICS ###
{'epoch': 15.0,
 'eval_accuracy': 0.8225806451612904,
 'eval_f1_macro': 0.8146523716699156,
 'eval_loss': 0.7139000296592712,
 'eval_precision_macro': 0.8307838561922046,
 'eval_recall_macro': 0.8095906432748537,
 'eval_runtime': 0.126,
 'eval_samples_per_second': 491.91,
 'eval_steps_per_second': 15.868}


Saving the dataset (0/1 shards):   0%|          | 0/30 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/214 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/62 [00:00<?, ? examples/s]