In [1]:
# Generic: A Modular Multi-Pipeline Framework for Probability Fusion Ensembles
# Specific: Cross-Lingual Sentiment Analysis with Probability Fusion Ensembles: A Modular Multi-Pipeline Framework for Low-Resource Languages

import os
import pandas as pd
import pprint

from pathlib import Path
from transformers import pipeline

from src.config import *
from src.metrics import evaluate_pipe
from src import (
    context,
    helper,
    sentiment, 
    utility, 
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"
helper.list_config()

if App.HAS_GPU:
    os.environ["MAMBA_USE_MAMBAPY"] = Mamba.FORCE_CUDA

if App.ACTION == "INFER":
    sample_texts = [
        "Maganda ang serbisyo at mabilis ang delivery!",
        "Sobrang pangit ng karanasan ko.",
        "It was okay, nothing special.",
    ]
    sentiment.infer(sample_texts, Mamba)
    sentiment.infer(sample_texts, MBert)
elif App.ACTION == "ENSEMBLE":
        temps  = [1.1, 0.9]
        weights = [0.4, 0.6]
        ens = sentiment.ensemble([MBert, Mamba], temps, weights)
        print(ens)
elif App.ACTION == "TRAIN":
    mbert_context = context.setup_pipeline(MBert, require_translation = False)
    mbert_trainer = sentiment.train(mbert_context)

    #xlmr_context = context.setup_pipeline(Xlmr, require_translation = False)
    #xlmr_trainer = sentiment.train(xlmr_context)
    
    #mamba_context = context.setup_pipeline(Mamba, require_translation = True)
    #mamba_trainer = sentiment.train(mamba_context)
else:
    raise ValueError("Invalid action.")



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/21 [00:00<?, ? examples/s]

{'dataloader_num_workers': 4, 'eval_strategy': 'epoch', 'fp16': True, 'gradient_accumulation_steps': 1, 'greater_is_better': True, 'learning_rate': 2e-05, 'load_best_model_at_end': True, 'logging_steps': 50, 'logging_strategy': 'steps', 'lr_scheduler_type': 'linear', 'max_grad_norm': 1.0, 'metric_for_best_model': 'eval_f1_macro', 'num_train_epochs': 5, 'output_dir': './mbert_sentiment', 'per_device_eval_batch_size': 64, 'per_device_train_batch_size': 32, 'save_strategy': 'epoch', 'seed': 42, 'warmup_ratio': 0.06, 'weight_decay': 0.01}
[OPTIMIZER] epoch_start=0 global_step=0 wrapped=AcceleratedOptimizer base=AdamW lr=0.0 id=130028588329600 hyperparams={'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[OPTIMIZER_GROUP]   group[0] {'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'a

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,No log,1.05529,0.5,0.525926,0.611111,0.527778
2,No log,1.039355,0.4,0.190476,0.133333,0.333333
3,No log,1.003815,0.5,0.353535,0.301587,0.444444
4,No log,1.00271,0.5,0.353535,0.301587,0.444444
5,No log,1.001536,0.4,0.295238,0.25,0.361111


[TRAINING_PERFORMANCE] epoch=1.0 tokens=0 time=0.78s tok/s=0.0 ex/s=0.00 peak_mem=3.33GiB lr=1.71e-05
[EVALUATION_PERFORMANCE] tokens=16384 time=0.02s tok/s=790302.9 ex/s=6174.24
[EVALUATION_PERFORMANCE] accuracy=0.5000 cost=$0.00 accuracy_per_$=810.99
[OPTIMIZER] epoch_start=1.0 global_step=3 wrapped=AcceleratedOptimizer base=AdamW lr=1.7142857142857142e-05 id=130028588329600 hyperparams={'lr': 1.7142857142857142e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[OPTIMIZER_GROUP]   group[0] {'lr': 1.7142857142857142e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[SCHEDULER]   scheduler_last_lr=[1.7142857142857142e-05]
[PARAM

[EVALUATION_PERFORMANCE] tokens=16384 time=0.01s tok/s=1404894.6 ex/s=10975.74
[EVALUATION_PERFORMANCE] accuracy=0.3810 cost=$0.05 accuracy_per_$=8.39

### BERT-BASE-MULTILINGUAL-CASED EVALUATION METRICS ###
{'epoch': 5.0,
 'eval_accuracy': 0.38095238095238093,
 'eval_f1_macro': 0.3762483130904184,
 'eval_loss': 1.0933195352554321,
 'eval_precision_macro': 0.4358974358974359,
 'eval_recall_macro': 0.40740740740740744,
 'eval_runtime': 0.1692,
 'eval_samples_per_second': 124.128,
 'eval_steps_per_second': 5.911}


Saving the dataset (0/1 shards):   0%|          | 0/10 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/69 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/21 [00:00<?, ? examples/s]