In [2]:
from models import models
models

{'multilingual': 'FacebookAI/xlm-roberta-base',
 'arq': 'CAMeL-Lab/bert-base-arabic-camelbert-da',
 'amh': 'Davlan/xlm-roberta-base-finetuned-amharic',
 'eng': 'FacebookAI/roberta-base',
 'hau': 'Davlan/xlm-roberta-base-finetuned-hausa',
 'kin': 'Davlan/xlm-roberta-base-finetuned-kinyarwanda',
 'mar': 'l3cube-pune/marathi-roberta',
 'ary': 'CAMeL-Lab/bert-base-arabic-camelbert-da',
 'esp': 'PlanTL-GOB-ES/roberta-base-bne',
 'tel': 'l3cube-pune/telugu-bert'}

In [3]:
from util import get_langs
langs = get_langs()
langs

['amh', 'arq', 'ary', 'eng', 'esp', 'hau', 'kin', 'mar', 'tel']

In [4]:
from pair_encoder.evaluation import CorrelationEvaluator, get_correlation
from pair_encoder.model import PairEncoder

import pandas as pd

from util import get_data, get_pairs, eval_and_submit

def eval_lang(model, lang, save_name, model_name="default"):
    data = {
        "dev": get_pairs(get_data(lang=lang, train=False)),
        "test": get_pairs(get_data(lang=lang, test=True))
    }
    correlations = {
        "dev": get_correlation(test=data["dev"], pair_encoder=model),
        "test": get_correlation(test=data["test"], pair_encoder=model)
    }
    eval_and_submit(
        pair_encoder=model,
        lang=lang,
        model_name=model_name,
        timestamp=f"{lang}-test-{save_name}",
        evaluation_phase=True
    )
    return correlations


In [5]:
from pair_encoder import train_encoder


for K in [1, 2, 3, 4, 5]:
    print(K)
    baseline_train_lang = {}
    sim_model = "intfloat/multilingual-e5-base"
    for lang in langs:
        print(f"Training on {lang}...")
        lang_train = get_data(lang=lang, train=True)
        lang_dev = get_data(lang=lang, train=False)

        train_pairs = get_pairs(lang_train)
        eval_pairs = get_pairs(lang_dev)
        evaluator = CorrelationEvaluator.load(eval_pairs)
        
        model_name = models[lang]
        print(f"Using model {model_name} for {lang}...")

        encoder, _ = train_encoder(
            train_samples=train_pairs,
            upscaling_samples=None,
            evaluator=evaluator,
            timestamp=f"{model_name}-train-{lang}",
            model_name=model_name,
            similarity_model=sim_model,
            batch_size=32,
            learning_rate=2e-5,
            max_grad_norm=1.0,
            epochs=5,
            eval_steps=0,
            max_length=200,
            k=K,
            weak_training_epochs=2,  # used if k > 0
            seed=42,
            save_to=None,
            verbose=True,
            device="cuda"
        )
        
        baseline_train_lang[lang] = eval_lang(encoder, lang, save_name=f"baseline-train-{K}-{lang}")
    df_baseline_lang = pd.DataFrame(baseline_train_lang).T
    df_prefix = f"custom-lang-k{K}"
    df_baseline_lang.columns = [f"{df_prefix}_{c}" for c in df_baseline_lang.columns]

    final_df = df_baseline_lang.T
    final_df = final_df * 100
    final_df = final_df.applymap(lambda x: round(x, 2))
    final_df
    columns = [
        "arq", "amh", "eng", "hau", "kin", "mar", "ary", "esp", "tel"
    ]
    final_df = final_df[columns]
    print(final_df.to_latex(float_format="%.2f"))

1
Training on amh...
Using model Davlan/xlm-roberta-base-finetuned-amharic for amh...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-amharic and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/31 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/31 [00:00<?, ?it/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

Reducing from 1972 to 1972
Using 1972 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/93 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/93 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/93 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/93 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/93 [00:00<?, ?it/s]

Saving submission to submissions/amh-test-baseline-train-1-amh/pred_amh_a.csv
Training on arq...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for arq...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/77 [00:00<?, ?it/s]

Reducing from 2459 to 2459
Using 2459 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/117 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/117 [00:00<?, ?it/s]

Saving submission to submissions/arq-test-baseline-train-1-arq/pred_arq_a.csv
Training on ary...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for ary...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Reducing from 1554 to 1554
Using 1554 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/78 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/78 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/78 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/78 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/78 [00:00<?, ?it/s]

Saving submission to submissions/ary-test-baseline-train-1-ary/pred_ary_a.csv
Training on eng...
Using model FacebookAI/roberta-base for eng...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/172 [00:00<?, ?it/s]

Batches:   0%|          | 0/298 [00:00<?, ?it/s]

Reducing from 9522 to 9522
Using 9522 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/470 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/470 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/470 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/470 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/470 [00:00<?, ?it/s]

Saving submission to submissions/eng-test-baseline-train-1-eng/pred_eng_a.csv
Training on esp...
Using model PlanTL-GOB-ES/roberta-base-bne for esp...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Reducing from 2879 to 2879
Using 2879 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/139 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/139 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/139 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/139 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/139 [00:00<?, ?it/s]



Saving submission to submissions/esp-test-baseline-train-1-esp/pred_esp_a.csv
Training on hau...
Using model Davlan/xlm-roberta-base-finetuned-hausa for hau...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-hausa and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/55 [00:00<?, ?it/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Reducing from 2259 to 2259
Using 2259 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/125 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/125 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/125 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/125 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/125 [00:00<?, ?it/s]

Saving submission to submissions/hau-test-baseline-train-1-hau/pred_hau_a.csv
Training on kin...
Using model Davlan/xlm-roberta-base-finetuned-kinyarwanda for kin...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-kinyarwanda and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/45 [00:00<?, ?it/s]

Reducing from 1440 to 1440
Using 1440 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/70 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/70 [00:00<?, ?it/s]

Saving submission to submissions/kin-test-baseline-train-1-kin/pred_kin_a.csv
Training on mar...
Using model l3cube-pune/marathi-roberta for mar...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/marathi-roberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/38 [00:00<?, ?it/s]

Batches:   0%|          | 0/75 [00:00<?, ?it/s]

Reducing from 2374 to 2374
Using 2374 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/112 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/112 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/112 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/112 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/112 [00:00<?, ?it/s]

Saving submission to submissions/mar-test-baseline-train-1-mar/pred_mar_a.csv
Training on tel...
Using model l3cube-pune/telugu-bert for tel...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/telugu-bert and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/37 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/37 [00:00<?, ?it/s]

Batches:   0%|          | 0/72 [00:00<?, ?it/s]

Reducing from 2273 to 2273
Using 2273 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/108 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/108 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/108 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/108 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/108 [00:00<?, ?it/s]

Saving submission to submissions/tel-test-baseline-train-1-tel/pred_tel_a.csv


  final_df = final_df.applymap(lambda x: round(x, 2))


\begin{tabular}{lrrrrrrrrr}
\toprule
 & arq & amh & eng & hau & kin & mar & ary & esp & tel \\
\midrule
custom-lang-k1_dev & 48.63 & 86.63 & 82.62 & 75.47 & 64.57 & 84.29 & 79.40 & 69.69 & 80.33 \\
custom-lang-k1_test & 44.56 & 81.99 & 83.42 & 66.56 & 72.75 & 85.83 & 80.74 & NaN & 84.42 \\
\bottomrule
\end{tabular}

2
Training on amh...
Using model Davlan/xlm-roberta-base-finetuned-amharic for amh...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-amharic and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/31 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/31 [00:00<?, ?it/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

Reducing from 3944 to 3944
Using 3944 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/155 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/155 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/155 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/155 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/155 [00:00<?, ?it/s]

Saving submission to submissions/amh-test-baseline-train-2-amh/pred_amh_a.csv
Training on arq...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for arq...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/77 [00:00<?, ?it/s]

Reducing from 4918 to 4918
Using 4918 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/194 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/194 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/194 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/194 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/194 [00:00<?, ?it/s]

Saving submission to submissions/arq-test-baseline-train-2-arq/pred_arq_a.csv
Training on ary...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for ary...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Reducing from 3108 to 3108
Using 3108 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/126 [00:00<?, ?it/s]

Saving submission to submissions/ary-test-baseline-train-2-ary/pred_ary_a.csv
Training on eng...
Using model FacebookAI/roberta-base for eng...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/172 [00:00<?, ?it/s]

Batches:   0%|          | 0/298 [00:00<?, ?it/s]

Reducing from 19044 to 19044
Using 19044 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/767 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/767 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/767 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/767 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/767 [00:00<?, ?it/s]

Saving submission to submissions/eng-test-baseline-train-2-eng/pred_eng_a.csv
Training on esp...
Using model PlanTL-GOB-ES/roberta-base-bne for esp...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Reducing from 5758 to 5758
Using 5758 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/229 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/229 [00:00<?, ?it/s]



Saving submission to submissions/esp-test-baseline-train-2-esp/pred_esp_a.csv
Training on hau...
Using model Davlan/xlm-roberta-base-finetuned-hausa for hau...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-hausa and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/55 [00:00<?, ?it/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Reducing from 4518 to 4518
Using 4518 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/196 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/196 [00:00<?, ?it/s]

Saving submission to submissions/hau-test-baseline-train-2-hau/pred_hau_a.csv
Training on kin...
Using model Davlan/xlm-roberta-base-finetuned-kinyarwanda for kin...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-kinyarwanda and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/45 [00:00<?, ?it/s]

Reducing from 2880 to 2880
Using 2880 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/115 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/115 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/115 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/115 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/115 [00:00<?, ?it/s]

Saving submission to submissions/kin-test-baseline-train-2-kin/pred_kin_a.csv
Training on mar...
Using model l3cube-pune/marathi-roberta for mar...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/marathi-roberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/38 [00:00<?, ?it/s]

Batches:   0%|          | 0/75 [00:00<?, ?it/s]

Reducing from 4748 to 4748
Using 4748 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/186 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/186 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/186 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/186 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/186 [00:00<?, ?it/s]

Saving submission to submissions/mar-test-baseline-train-2-mar/pred_mar_a.csv
Training on tel...
Using model l3cube-pune/telugu-bert for tel...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/telugu-bert and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/37 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/37 [00:00<?, ?it/s]

Batches:   0%|          | 0/72 [00:00<?, ?it/s]

Reducing from 4546 to 4546
Using 4546 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/179 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/179 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/179 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/179 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/179 [00:00<?, ?it/s]

Saving submission to submissions/tel-test-baseline-train-2-tel/pred_tel_a.csv
\begin{tabular}{lrrrrrrrrr}
\toprule
 & arq & amh & eng & hau & kin & mar & ary & esp & tel \\
\midrule
custom-lang-k2_dev & 49.22 & 85.64 & 83.02 & 74.71 & 65.61 & 84.06 & 78.53 & 68.64 & 81.09 \\
custom-lang-k2_test & 43.75 & 81.89 & 83.27 & 65.66 & 70.27 & 85.72 & 80.49 & NaN & 85.05 \\
\bottomrule
\end{tabular}

3
Training on amh...
Using model Davlan/xlm-roberta-base-finetuned-amharic for amh...


  final_df = final_df.applymap(lambda x: round(x, 2))
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-amharic and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/31 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/31 [00:00<?, ?it/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

Reducing from 5916 to 5916
Using 5916 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/216 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/216 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/216 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/216 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/216 [00:00<?, ?it/s]

Saving submission to submissions/amh-test-baseline-train-3-amh/pred_amh_a.csv
Training on arq...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for arq...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/77 [00:00<?, ?it/s]

Reducing from 7377 to 7377
Using 7377 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/270 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/270 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/270 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/270 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/270 [00:00<?, ?it/s]

Saving submission to submissions/arq-test-baseline-train-3-arq/pred_arq_a.csv
Training on ary...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for ary...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Reducing from 4662 to 4662
Using 4662 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/175 [00:00<?, ?it/s]

Saving submission to submissions/ary-test-baseline-train-3-ary/pred_ary_a.csv
Training on eng...
Using model FacebookAI/roberta-base for eng...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/172 [00:00<?, ?it/s]

Batches:   0%|          | 0/298 [00:00<?, ?it/s]

Reducing from 28566 to 28566
Using 28566 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1065 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/1065 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/1065 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/1065 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/1065 [00:00<?, ?it/s]

Saving submission to submissions/eng-test-baseline-train-3-eng/pred_eng_a.csv
Training on esp...
Using model PlanTL-GOB-ES/roberta-base-bne for esp...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Reducing from 8637 to 8637
Using 8637 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/319 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/319 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/319 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/319 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/319 [00:00<?, ?it/s]



Saving submission to submissions/esp-test-baseline-train-3-esp/pred_esp_a.csv
Training on hau...
Using model Davlan/xlm-roberta-base-finetuned-hausa for hau...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-hausa and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/55 [00:00<?, ?it/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Reducing from 6777 to 6777
Using 6777 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/267 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/267 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/267 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/267 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/267 [00:00<?, ?it/s]

Saving submission to submissions/hau-test-baseline-train-3-hau/pred_hau_a.csv
Training on kin...
Using model Davlan/xlm-roberta-base-finetuned-kinyarwanda for kin...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-kinyarwanda and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/45 [00:00<?, ?it/s]

Reducing from 4320 to 4320
Using 4320 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/160 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/160 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/160 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/160 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/160 [00:00<?, ?it/s]

Saving submission to submissions/kin-test-baseline-train-3-kin/pred_kin_a.csv
Training on mar...
Using model l3cube-pune/marathi-roberta for mar...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/marathi-roberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/38 [00:00<?, ?it/s]

Batches:   0%|          | 0/75 [00:00<?, ?it/s]

Reducing from 7122 to 7122
Using 7122 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/261 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/261 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/261 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/261 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/261 [00:00<?, ?it/s]

Saving submission to submissions/mar-test-baseline-train-3-mar/pred_mar_a.csv
Training on tel...
Using model l3cube-pune/telugu-bert for tel...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/telugu-bert and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/37 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/37 [00:00<?, ?it/s]

Batches:   0%|          | 0/72 [00:00<?, ?it/s]

Reducing from 6819 to 6819
Using 6819 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/250 [00:00<?, ?it/s]

Saving submission to submissions/tel-test-baseline-train-3-tel/pred_tel_a.csv
\begin{tabular}{lrrrrrrrrr}
\toprule
 & arq & amh & eng & hau & kin & mar & ary & esp & tel \\
\midrule
custom-lang-k3_dev & 46.23 & 86.05 & 82.76 & 74.87 & 69.87 & 83.73 & 77.98 & 68.80 & 80.68 \\
custom-lang-k3_test & 42.28 & 81.13 & 83.39 & 64.67 & 71.47 & 85.36 & 80.37 & NaN & 84.73 \\
\bottomrule
\end{tabular}

4
Training on amh...
Using model Davlan/xlm-roberta-base-finetuned-amharic for amh...


  final_df = final_df.applymap(lambda x: round(x, 2))
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-amharic and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/31 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/31 [00:00<?, ?it/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

Reducing from 7888 to 7888
Using 7888 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/278 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/278 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/278 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/278 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/278 [00:00<?, ?it/s]

Saving submission to submissions/amh-test-baseline-train-4-amh/pred_amh_a.csv
Training on arq...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for arq...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/77 [00:00<?, ?it/s]

Reducing from 9836 to 9836
Using 9836 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/347 [00:00<?, ?it/s]

Saving submission to submissions/arq-test-baseline-train-4-arq/pred_arq_a.csv
Training on ary...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for ary...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Reducing from 6216 to 6216
Using 6216 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/224 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/224 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/224 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/224 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/224 [00:00<?, ?it/s]

Saving submission to submissions/ary-test-baseline-train-4-ary/pred_ary_a.csv
Training on eng...
Using model FacebookAI/roberta-base for eng...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/172 [00:00<?, ?it/s]

Batches:   0%|          | 0/298 [00:00<?, ?it/s]

Reducing from 38088 to 38088
Using 38088 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1363 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/1363 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/1363 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/1363 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/1363 [00:00<?, ?it/s]

Saving submission to submissions/eng-test-baseline-train-4-eng/pred_eng_a.csv
Training on esp...
Using model PlanTL-GOB-ES/roberta-base-bne for esp...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Reducing from 11516 to 11516
Using 11516 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/409 [00:00<?, ?it/s]



Saving submission to submissions/esp-test-baseline-train-4-esp/pred_esp_a.csv
Training on hau...
Using model Davlan/xlm-roberta-base-finetuned-hausa for hau...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-hausa and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/55 [00:00<?, ?it/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Reducing from 9036 to 9036
Using 9036 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/337 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/337 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/337 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/337 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/337 [00:00<?, ?it/s]

Saving submission to submissions/hau-test-baseline-train-4-hau/pred_hau_a.csv
Training on kin...
Using model Davlan/xlm-roberta-base-finetuned-kinyarwanda for kin...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-kinyarwanda and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/45 [00:00<?, ?it/s]

Reducing from 5760 to 5760
Using 5760 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/205 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/205 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/205 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/205 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/205 [00:00<?, ?it/s]

Saving submission to submissions/kin-test-baseline-train-4-kin/pred_kin_a.csv
Training on mar...
Using model l3cube-pune/marathi-roberta for mar...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/marathi-roberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/38 [00:00<?, ?it/s]

Batches:   0%|          | 0/75 [00:00<?, ?it/s]

Reducing from 9496 to 9496
Using 9496 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/335 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/335 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/335 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/335 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/335 [00:00<?, ?it/s]

Saving submission to submissions/mar-test-baseline-train-4-mar/pred_mar_a.csv
Training on tel...
Using model l3cube-pune/telugu-bert for tel...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/telugu-bert and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/37 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/37 [00:00<?, ?it/s]

Batches:   0%|          | 0/72 [00:00<?, ?it/s]

Reducing from 9092 to 9092
Using 9092 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/321 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/321 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/321 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/321 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/321 [00:00<?, ?it/s]

Saving submission to submissions/tel-test-baseline-train-4-tel/pred_tel_a.csv
\begin{tabular}{lrrrrrrrrr}
\toprule
 & arq & amh & eng & hau & kin & mar & ary & esp & tel \\
\midrule
custom-lang-k4_dev & 48.07 & 85.99 & 83.04 & 73.37 & 35.62 & 83.72 & 77.84 & 69.53 & 81.87 \\
custom-lang-k4_test & 42.54 & 81.44 & 83.38 & 64.84 & 57.18 & 85.28 & 80.20 & NaN & 84.45 \\
\bottomrule
\end{tabular}

5
Training on amh...
Using model Davlan/xlm-roberta-base-finetuned-amharic for amh...


  final_df = final_df.applymap(lambda x: round(x, 2))
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-amharic and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/31 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/31 [00:00<?, ?it/s]

Batches:   0%|          | 0/62 [00:00<?, ?it/s]

Reducing from 9860 to 9860
Using 9860 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/340 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/340 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/340 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/340 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/340 [00:00<?, ?it/s]

Saving submission to submissions/amh-test-baseline-train-5-amh/pred_amh_a.csv
Training on arq...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for arq...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Batches:   0%|          | 0/77 [00:00<?, ?it/s]

Reducing from 12295 to 12295
Using 12295 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/424 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/424 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/424 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/424 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/424 [00:00<?, ?it/s]

Saving submission to submissions/arq-test-baseline-train-5-arq/pred_arq_a.csv
Training on ary...
Using model CAMeL-Lab/bert-base-arabic-camelbert-da for ary...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-da and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/29 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/29 [00:00<?, ?it/s]

Batches:   0%|          | 0/49 [00:00<?, ?it/s]

Reducing from 7770 to 7770
Using 7770 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/272 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/272 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/272 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/272 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/272 [00:00<?, ?it/s]

Saving submission to submissions/ary-test-baseline-train-5-ary/pred_ary_a.csv
Training on eng...
Using model FacebookAI/roberta-base for eng...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/172 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/172 [00:00<?, ?it/s]

Batches:   0%|          | 0/298 [00:00<?, ?it/s]

Reducing from 47610 to 47610
Using 47610 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1660 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/1660 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/1660 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/1660 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/1660 [00:00<?, ?it/s]

Saving submission to submissions/eng-test-baseline-train-5-eng/pred_eng_a.csv
Training on esp...
Using model PlanTL-GOB-ES/roberta-base-bne for esp...


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-bne and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/49 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/49 [00:00<?, ?it/s]

Batches:   0%|          | 0/90 [00:00<?, ?it/s]

Reducing from 14395 to 14395
Using 14395 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/499 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/499 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/499 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/499 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/499 [00:00<?, ?it/s]



Saving submission to submissions/esp-test-baseline-train-5-esp/pred_esp_a.csv
Training on hau...
Using model Davlan/xlm-roberta-base-finetuned-hausa for hau...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-hausa and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/55 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/55 [00:00<?, ?it/s]

Batches:   0%|          | 0/71 [00:00<?, ?it/s]

Reducing from 11295 to 11295
Using 11295 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/408 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/408 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/408 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/408 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/408 [00:00<?, ?it/s]

Saving submission to submissions/hau-test-baseline-train-5-hau/pred_hau_a.csv
Training on kin...
Using model Davlan/xlm-roberta-base-finetuned-kinyarwanda for kin...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at Davlan/xlm-roberta-base-finetuned-kinyarwanda and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/25 [00:00<?, ?it/s]

Batches:   0%|          | 0/45 [00:00<?, ?it/s]

Reducing from 7200 to 7200
Using 7200 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/250 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/250 [00:00<?, ?it/s]

Saving submission to submissions/kin-test-baseline-train-5-kin/pred_kin_a.csv
Training on mar...
Using model l3cube-pune/marathi-roberta for mar...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/marathi-roberta and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/38 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/38 [00:00<?, ?it/s]

Batches:   0%|          | 0/75 [00:00<?, ?it/s]

Reducing from 11870 to 11870
Using 11870 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/409 [00:00<?, ?it/s]

Epoch 5:   0%|          | 0/409 [00:00<?, ?it/s]

Saving submission to submissions/mar-test-baseline-train-5-mar/pred_mar_a.csv
Training on tel...
Using model l3cube-pune/telugu-bert for tel...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/telugu-bert and are newly initialized: ['classifier.weight', 'classifier.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training...:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/37 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/37 [00:00<?, ?it/s]

Batches:   0%|          | 0/72 [00:00<?, ?it/s]

Reducing from 11365 to 11365
Using 11365 weak samples for upscaling.


Training...:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/392 [00:00<?, ?it/s]

Epoch 2:   0%|          | 0/392 [00:00<?, ?it/s]

Epoch 3:   0%|          | 0/392 [00:00<?, ?it/s]

Epoch 4:   0%|          | 0/392 [00:00<?, ?it/s]

KeyboardInterrupt: 