In [1]:
# Generic: A Modular Multi-Pipeline Framework for Probability Fusion Ensembles
# Specific: Cross-Lingual Sentiment Analysis with Probability Fusion Ensembles: A Modular Multi-Pipeline Framework for Low-Resource Languages

import os
import pandas as pd
import pprint

from pathlib import Path
from transformers import pipeline

from src.config import *
from src.metrics import evaluate_pipe
from src import (
    context,
    helper,
    sentiment, 
    utility, 
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"
# helper.list_config()

if App.HAS_GPU:
    os.environ["MAMBA_USE_MAMBAPY"] = Mamba.FORCE_CUDA

if App.ACTION == "INFER":
    sample_texts = [
        "Maganda ang serbisyo at mabilis ang delivery!",
        "Sobrang pangit ng karanasan ko.",
        "It was okay, nothing special.",
    ]
    sentiment.infer(sample_texts, Mamba)
    sentiment.infer(sample_texts, MBert)
elif App.ACTION == "ENSEMBLE":
        temps  = [1.1, 0.9]
        weights = [0.4, 0.6]
        ens = sentiment.ensemble([MBert, Mamba], temps, weights)
        print(ens)
elif App.ACTION == "TRAIN":
    mbert_context = context.setup_pipeline(MBert, require_translation = False)
    mbert_trainer = sentiment.train(mbert_context)

    #xlmr_context = context.setup_pipeline(Xlmr, require_translation = False)
    #xlmr_trainer = sentiment.train(xlmr_context)
    
    #mamba_context = context.setup_pipeline(Mamba, require_translation = True)
    #mamba_trainer = sentiment.train(mamba_context)
else:
    raise ValueError("Invalid action.")



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/21 [00:00<?, ? examples/s]

{'dataloader_num_workers': 4, 'eval_strategy': 'epoch', 'fp16': True, 'gradient_accumulation_steps': 1, 'greater_is_better': True, 'learning_rate': 2e-05, 'load_best_model_at_end': True, 'logging_steps': 50, 'logging_strategy': 'steps', 'lr_scheduler_type': 'linear', 'max_grad_norm': 1.0, 'metric_for_best_model': 'eval_f1_macro', 'num_train_epochs': 5, 'output_dir': './mbert_sentiment', 'per_device_eval_batch_size': 64, 'per_device_train_batch_size': 32, 'save_strategy': 'epoch', 'seed': 42, 'warmup_ratio': 0.06, 'weight_decay': 0.01}
AdamW {'lr': 2e-05, 'weight_decay': 0.01, 'eps': 1e-06, 'betas': (0.9, 0.999)}
SGD {'lr': 0.0001, 'weight_decay': 0.01, 'nesterov': True, 'momentum': 0.95}
[DBG-LIVE] epoch_start=0 global_step=0 wrapped=AcceleratedOptimizer base=AdamW lr=0.0 id=133135064372128 hyperparams={'lr': 0.0, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Precision Macro,Recall Macro
1,No log,1.08432,0.4,0.411111,0.595238,0.416667
2,No log,1.042808,0.4,0.314815,0.277778,0.388889
3,No log,1.022843,0.4,0.314815,0.277778,0.388889
4,No log,1.022344,0.4,0.314815,0.277778,0.388889
5,No log,1.021859,0.4,0.314815,0.277778,0.388889


[DBG-LIVE] epoch_start=1.0 global_step=3 wrapped=AcceleratedOptimizer base=AdamW lr=1.7142857142857142e-05 id=133135064372128 hyperparams={'lr': 1.7142857142857142e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
[DBG-LIVE] epoch_start=2.0 global_step=6 wrapped=AcceleratedOptimizer base=AdamW lr=1.2857142857142859e-05 id=133135064372128 hyperparams={'lr': 1.2857142857142859e-05, 'betas': (0.9, 0.999), 'eps': 1e-06, 'weight_decay': 0.01, 'amsgrad': False, 'maximize': False, 'foreach': None, 'capturable': False, 'differentiable': False, 'fused': None, 'decoupled_weight_decay': True, 'initial_lr': 2e-05}
Switching optimizer to SGD right after epoch 3 (next epoch will print SGD in your debug)
[SWITCHED] base=SGD lr=0.0001
[DBG-LIVE] epoch_start=3.0 global_step=9 wrapped=AcceleratedOptimizer base=SGD lr=0.0001 


### BERT-BASE-MULTILINGUAL-CASED EVALUATION METRICS ###
{'epoch': 5.0,
 'eval_accuracy': 0.2857142857142857,
 'eval_f1_macro': 0.225,
 'eval_loss': 1.0926361083984375,
 'eval_precision_macro': 0.21428571428571427,
 'eval_recall_macro': 0.27777777777777773,
 'eval_runtime': 0.1894,
 'eval_samples_per_second': 110.871,
 'eval_steps_per_second': 5.28}


Saving the dataset (0/1 shards):   0%|          | 0/10 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/69 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/21 [00:00<?, ? examples/s]