In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# *Installing Required Libraries*

In [1]:
!pip install transformers
!pip install datasets
!pip install accelerate
!pip install wandb

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[

In [None]:
%cd '/content/drive/MyDrive/'

/content/drive/MyDrive


In [None]:
!unzip /content/data.zip

Archive:  /content/data.zip
  inflating: data/val.csv            
  inflating: data/train.csv          
  inflating: data/test.csv           
  inflating: data/final_val.csv      
  inflating: data/final_test.csv     
  inflating: data/final_train.csv    


# *Importing libraries*

In [2]:
import json
import wandb
import pickle
import pandas as pd
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import torch
from datasets import Dataset
from datasets import load_dataset
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from transformers import AutoTokenizer, AutoModelForMaskedLM,AutoModelForSequenceClassification
from transformers import TrainerCallback, EarlyStoppingCallback
from transformers import TrainingArguments, Trainer

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
val_df = pd.read_csv("val.csv")

train_df = train_df.drop(columns=['Num_Tokens','Num_Sentences','corrected_text'])
test_df = test_df.drop(columns=['Num_Tokens','Num_Sentences','corrected_text'])
val_df = val_df.drop(columns=['Num_Tokens','Num_Sentences','corrected_text'])

In [4]:
with open('/content/rh-code-mixed-2.pkl', 'rb') as f:
    related_words = pickle.load(f)

In [5]:
train_df['labels'].value_counts()

Unnamed: 0_level_0,count
labels,Unnamed: 1_level_1
0,2319
1,741


In [6]:
val_df['labels'].value_counts()

Unnamed: 0_level_0,count
labels,Unnamed: 1_level_1
0,774
1,247


In [7]:
test_df['labels'].value_counts()

Unnamed: 0_level_0,count
labels,Unnamed: 1_level_1
0,774
1,247


In [8]:
train_df.to_csv('final_train.csv',index=False, encoding='utf-8')
test_df.to_csv('final_test.csv',index=False, encoding='utf-8')
val_df.to_csv('final_val.csv',index=False, encoding='utf-8')

## *Dataset loading as HuggingFace Trainer*

In [9]:
dataset = load_dataset('csv', data_files={'train': "final_train.csv",
                                              'val':"final_val.csv",'test':"final_test.csv"})

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [10]:
dataset

DatasetDict({
    train: Dataset({
        features: ['clean_text', 'labels', 'language_tags'],
        num_rows: 3060
    })
    val: Dataset({
        features: ['clean_text', 'labels', 'language_tags'],
        num_rows: 1021
    })
    test: Dataset({
        features: ['clean_text', 'labels', 'language_tags'],
        num_rows: 1021
    })
})

In [11]:
dataset['train'][0]

{'clean_text': 'hindu to kuch hai par tum to pori duniya mai gandgi macha rhe ho or bat rhi bhagwe ki to tumhe aba ke abe bhagwa hi the talvar ke dar se salvar khol di unhone isme tumhari koi galti nhi hai',
 'labels': 1,
 'language_tags': 'EN, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, EN, EN, HI, HI, HI, HI, HI, EN, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI'}

In [12]:
related_words["mot"]

{'related_words': [],
 'h_script': 'मोत',
 'language': 'HI',
 'corrected_word': 'mot'}

In [14]:
!pip install google-transliteration-api

Collecting google-transliteration-api
  Downloading google_transliteration_api-1.0.3-py3-none-any.whl.metadata (2.9 kB)
Downloading google_transliteration_api-1.0.3-py3-none-any.whl (5.2 kB)
Installing collected packages: google-transliteration-api
Successfully installed google-transliteration-api-1.0.3


In [16]:
from google.transliteration import transliterate_word
suggestions = transliterate_word('America', lang_code='hi')
print(suggestions)

['अमेरिका', 'अमेरीका', 'अमरीका', 'अमरिका', 'आमेरिका', 'अमेंरिका']


In [17]:
def transliterated_preprocess(sentences, related_words):
    texts = sentences['clean_text']
    processed_texts = []

    for text in texts:
        words = text.split()
        process_text = []

        for word in words:
            # Check if the word exists in the related_words dictionary
            if related_words[word].get('h_script') != None :
                # Append the h_script value from related_words to the interleaved_text list
                process_text.append(related_words[word]['h_script'])
            else:
                # If the word is not in related_words, just append the original word
                suggestions = transliterate_word(word, lang_code='hi')
                process_text.append(suggestions[0])

        # Join the list back into a string
        processed_texts.append(" ".join(process_text))

    return {"transliterated_text": processed_texts}

dataset = dataset.map(
    transliterated_preprocess,
    fn_kwargs={'related_words': related_words},
    batched=True
)

Map:   0%|          | 0/3060 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

In [18]:
dataset['train'][0]

{'clean_text': 'hindu to kuch hai par tum to pori duniya mai gandgi macha rhe ho or bat rhi bhagwe ki to tumhe aba ke abe bhagwa hi the talvar ke dar se salvar khol di unhone isme tumhari koi galti nhi hai',
 'labels': 1,
 'language_tags': 'EN, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, EN, EN, HI, HI, HI, HI, HI, EN, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI, HI',
 'transliterated_text': 'हिन्दु तो कुछ है पर तुम तो पूरी दुनिया मई गंदगी मचा रहे हो ओआर बत् रही भगवे की तो तुम्हे अब के अबे भगवा ही थे तलवार के दर से सलवार खोल दी उन्होंने इसमें तुम्हारी कोई गलती नहीं है'}

In [19]:
dataset['val'][0]

{'clean_text': 'hindu rastra ke ruzhan miya bhai ko kast hai chalo hindu is bat pe mast hai',
 'labels': 1,
 'language_tags': 'EN, EN, HI, HI, HI, HI, HI, HI, HI, HI, EN, HI, EN, HI, EN, HI',
 'transliterated_text': 'हिन्दु रस्त्र के रुझान मिया भाई को कास्ट है चलो हिन्दु इस बत् पे मस्त् है'}

In [20]:
dataset['test'][0]

{'clean_text': 'jai hind , in namuno ko bat bhi karni nahi ati hai , inko pension chahiye',
 'labels': 0,
 'language_tags': 'HI, EN, OOV, EN, HI, HI, EN, HI, HI, HI, HI, HI, OOV, HI, EN, HI',
 'transliterated_text': 'जय हिन्द् , इन् नमूनों को बत् भी करनी नहीं अति है , इनको पेंषन चाहिए'}

In [22]:
df11 = pd.read_csv("/content/transliteration_v2.csv")
df11['transliterate'] = dataset['test']['transliterated_text']

In [24]:
df11.to_csv("transliteration_v22.csv")

# For 5000 rows dataset with clean_text

# *Tokenization as per Hing-MBERT*

In [21]:
tokenizer = AutoTokenizer.from_pretrained("l3cube-pune/hing-mbert")
print(len(tokenizer))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/327 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.92M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

119547


In [22]:
len(tokenizer)

119547

In [23]:
mbert_dataset = dataset.map(
    lambda example: tokenizer(example['transliterated_text'],max_length=97,padding='max_length', truncation=True),
    batched=True,
    batch_size=64
)
mbert_dataset = mbert_dataset.remove_columns(["clean_text","language_tags","transliterated_text"])
mbert_dataset.set_format("torch")

Map:   0%|          | 0/3060 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

In [24]:
mbert_dataset['train'][0]

{'labels': tensor(1),
 'input_ids': tensor([   101,    899,  25936,  60254,  14070,  21042,  22022,  10569,  12213,
            880,  14070,  13841,  21042,  96597,  83874,  70219,    867,  52768,
          34315,    889,  22078,  33555,  13220,    863, 111193,  11549,    887,
          11845,  20429,  57203,    888,  19741,  72109,  10826,  21042,    880,
          14070,  45753, 103860,  49545,  10412,  49545,  11554,    888,  19741,
          28960,  14080,  17798,    880,  11714,  52884,  10412, 100906,  11072,
            898,  11714,  52884,    866,  51140,  41607,  27640,  53744,    880,
          14070,  45753,  42263,  10914,  38207,    867,  11714,  18406,  16791,
          10569,    102,      0,      0,      0,      0,      0,      0,      0,
              0,      0,      0,      0,      0,      0,      0,      0,      0,
              0,      0,      0,      0,      0,      0,      0]),
 'token_type_ids': tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [25]:
print("Batch Decode:")
print(tokenizer.batch_decode(mbert_dataset['train'][0]['input_ids']))

Batch Decode:
['[CLS]', 'ह', '##िन', '##्द', '##ु', 'तो', 'कुछ', 'है', 'पर', 'त', '##ु', '##म', 'तो', 'पूरी', 'दुनिया', 'मई', 'ग', '##ंद', '##गी', 'म', '##चा', 'रहे', 'हो', 'ओ', '##आ', '##र', 'ब', '##त', '##्', 'रही', 'भ', '##ग', '##वे', 'की', 'तो', 'त', '##ु', '##म्', '##हे', 'अब', 'के', 'अब', '##े', 'भ', '##ग', '##वा', 'ही', 'थे', 'त', '##ल', '##वार', 'के', 'दर', 'से', 'स', '##ल', '##वार', 'ख', '##ोल', 'दी', 'उन्होंने', 'इसमें', 'त', '##ु', '##म्', '##हार', '##ी', 'कोई', 'ग', '##ल', '##ती', 'नहीं', 'है', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']


In [26]:
cls = [tokenizer.cls_token_id]
sep = [tokenizer.sep_token_id]

input_str = dataset['train'][0]['transliterated_text']
# Tokenization happens in a few steps:
input_tokens = tokenizer.tokenize(input_str)
input_ids = tokenizer.convert_tokens_to_ids(input_tokens)
input_ids_special_tokens = cls + input_ids + sep

decoded_str = tokenizer.decode(input_ids_special_tokens)

print("start:                ", input_str)
print("tokenize:             ", input_tokens)
print("convert_tokens_to_ids:", input_ids)
print("add special tokens:   ", input_ids_special_tokens)
print("--------")
print("decode:               ", decoded_str)

start:                 हिन्दु तो कुछ है पर तुम तो पूरी दुनिया मई गंदगी मचा रहे हो ओआर बत् रही भगवे की तो तुम्हे अब के अबे भगवा ही थे तलवार के दर से सलवार खोल दी उन्होंने इसमें तुम्हारी कोई गलती नहीं है
tokenize:              ['ह', '##िन', '##्द', '##ु', 'तो', 'कुछ', 'है', 'पर', 'त', '##ु', '##म', 'तो', 'पूरी', 'दुनिया', 'मई', 'ग', '##ंद', '##गी', 'म', '##चा', 'रहे', 'हो', 'ओ', '##आ', '##र', 'ब', '##त', '##्', 'रही', 'भ', '##ग', '##वे', 'की', 'तो', 'त', '##ु', '##म्', '##हे', 'अब', 'के', 'अब', '##े', 'भ', '##ग', '##वा', 'ही', 'थे', 'त', '##ल', '##वार', 'के', 'दर', 'से', 'स', '##ल', '##वार', 'ख', '##ोल', 'दी', 'उन्होंने', 'इसमें', 'त', '##ु', '##म्', '##हार', '##ी', 'कोई', 'ग', '##ल', '##ती', 'नहीं', 'है']
convert_tokens_to_ids: [899, 25936, 60254, 14070, 21042, 22022, 10569, 12213, 880, 14070, 13841, 21042, 96597, 83874, 70219, 867, 52768, 34315, 889, 22078, 33555, 13220, 863, 111193, 11549, 887, 11845, 20429, 57203, 888, 19741, 72109, 10826, 21042, 880, 14070, 45753, 103860, 49545, 104

# *Calculating Weights for Labels with Imbalanced Dataset*

In [27]:
# Define all possible class labels
class_labels = np.unique(mbert_dataset['train']['labels'])

# Calculate class weights
labels = mbert_dataset['train']['labels']
class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=labels.numpy())
class_weights = torch.tensor(class_weights, dtype=torch.float)
print(class_weights)

tensor([0.6598, 2.0648])


In [28]:
class_labels

array([0, 1])

# **Weighted Loss Trainer for Imbalanced Datset**

In [29]:
from transformers import TrainingArguments, Trainer
# Define a custom Trainer class to include class weights
class WeightedLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels").to(model.device)  # Ensure labels are on the same device as model
        inputs = {k: v.to(model.device) for k, v in inputs.items()}  # Ensure all inputs are on the same device as model
        # Forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # Compute weighted loss
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights.to(model.device))  # Move class_weights to the same device as model
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

# *Function: Compute metrics for all labels*

In [30]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = precision_score(labels, predictions, average='weighted')
    recall = recall_score(labels, predictions, average='weighted')
    f1 = f1_score(labels, predictions, average='weighted')
    accuracy = accuracy_score(labels, predictions)

    # Additional metrics
    recall_micro = recall_score(labels, predictions, average='micro')
    f1_micro = f1_score(labels, predictions, average='micro')

    recall_macro = recall_score(labels, predictions, average='macro')
    f1_macro = f1_score(labels, predictions, average='macro')

    recall_positive = recall_score(labels, predictions, pos_label=1)
    f1_positive = f1_score(labels, predictions, pos_label=1)

    recall_negative = recall_score(labels, predictions, pos_label=0)
    f1_negative = f1_score(labels, predictions, pos_label=0)

    return {
        'accuracy': accuracy,
        'precision_weighted': precision,
        'recall_weighted': recall,
        'recall_micro': recall_micro,
        'recall_macro': recall_macro,
        'recall_positive': recall_positive,
        'recall_negative': recall_negative,
        'f1_weighted': f1,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_positive': f1_positive,
        'f1_negative': f1_negative
    }

In [31]:
!export CUDA_LAUNCH_BLOCKING=1

In [32]:
wandb.init(project="rh2", name="Hing_mBERT_hi_dev")
model = AutoModelForSequenceClassification.from_pretrained("l3cube-pune/hing-mbert", num_labels=2)

model.resize_token_embeddings(len(tokenizer))

arguments = TrainingArguments(
    output_dir="sample_HingMBert_trainer_5k_hi_dev",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    optim = 'adamw_torch',
    evaluation_strategy="epoch", # run validation at the end of each epoch
    save_strategy="epoch",
    learning_rate=3e-5,
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    load_best_model_at_end=True,
    metric_for_best_model='eval_f1_weighted',  # Define the metric for early stopping
    greater_is_better=True,  # Set to False because we want to minimize the loss
    save_total_limit=1,
    seed=224,
    report_to="wandb"
)


trainer = WeightedLossTrainer(
    model=model,
    args=arguments,
    train_dataset=mbert_dataset['train'],
    eval_dataset=mbert_dataset['val'], # change to test when you do your final evaluation!
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/712M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/hing-mbert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# *Logger Function for early Stopping and logging*

In [33]:
class LoggingCallback(TrainerCallback):
    def __init__(self, log_path):
        self.log_path = log_path
    # will call on_log on each logging step, specified by TrainerArguement. (i.e TrainerArguement.logginng_step)
    def on_log(self, args, state, control, logs=None, **kwargs):
        _ = logs.pop("total_flos", None)
        if state.is_local_process_zero:
            with open(self.log_path, "a") as f:
                f.write(json.dumps(logs) + "\n")


In [34]:
trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.01))
trainer.add_callback(LoggingCallback("sample_HingMBert_trainer_5k_hi_dev/log.jsonl"))

In [35]:
# train the model
trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,Recall Micro,Recall Macro,Recall Positive,Recall Negative,F1 Weighted,F1 Micro,F1 Macro,F1 Positive,F1 Negative
1,No log,0.689481,0.749265,0.688631,0.749265,0.749265,0.532778,0.11336,0.952196,0.689323,0.749265,0.515755,0.179487,0.852023
2,No log,0.680469,0.451518,0.721815,0.451518,0.451518,0.581733,0.834008,0.329457,0.46387,0.451518,0.450252,0.423868,0.476636
3,No log,0.614273,0.681685,0.745521,0.681685,0.681685,0.664627,0.631579,0.697674,0.701215,0.681685,0.62924,0.489796,0.768683
4,No log,0.597225,0.669931,0.758177,0.669931,0.669931,0.680306,0.700405,0.660207,0.692648,0.669931,0.629306,0.506589,0.752024
5,No log,0.588774,0.65524,0.763862,0.65524,0.65524,0.684399,0.740891,0.627907,0.679855,0.65524,0.621944,0.509749,0.734139
6,No log,0.61633,0.628795,0.756673,0.628795,0.628795,0.669714,0.748988,0.590439,0.655381,0.628795,0.600438,0.493992,0.706883
7,No log,0.641109,0.725759,0.766309,0.725759,0.725759,0.695075,0.635628,0.754522,0.739374,0.725759,0.667625,0.52862,0.80663
8,No log,0.768109,0.790402,0.777447,0.790402,0.790402,0.678444,0.461538,0.895349,0.781478,0.790402,0.691044,0.515837,0.86625
9,No log,0.805929,0.623898,0.770477,0.623898,0.623898,0.683023,0.797571,0.568475,0.650292,0.623898,0.601315,0.506427,0.696203
10,No log,0.785405,0.730656,0.774002,0.730656,0.730656,0.706575,0.659919,0.75323,0.744633,0.730656,0.675795,0.542429,0.80916


TrainOutput(global_step=480, training_loss=0.5401123682657878, metrics={'train_runtime': 177.7499, 'train_samples_per_second': 172.152, 'train_steps_per_second': 2.7, 'train_loss': 0.5401123682657878, 'epoch': 10.0})

In [36]:
mbert_results = trainer.evaluate()

In [37]:
print(mbert_results)

{'eval_loss': 0.768108606338501, 'eval_accuracy': 0.7904015670910872, 'eval_precision_weighted': 0.777446504756055, 'eval_recall_weighted': 0.7904015670910872, 'eval_recall_micro': 0.7904015670910872, 'eval_recall_macro': 0.6784436493738819, 'eval_recall_positive': 0.46153846153846156, 'eval_recall_negative': 0.8953488372093024, 'eval_f1_weighted': 0.7814782220429798, 'eval_f1_micro': 0.7904015670910872, 'eval_f1_macro': 0.6910435520361992, 'eval_f1_positive': 0.5158371040723982, 'eval_f1_negative': 0.8662500000000001, 'eval_runtime': 1.6223, 'eval_samples_per_second': 629.358, 'eval_steps_per_second': 39.45, 'epoch': 10.0}


# **Hing-MBERT Test results**

In [38]:
test_results_mbert = trainer.predict(mbert_dataset['test'])

In [39]:
test_results_mbert

PredictionOutput(predictions=array([[ 1.3672379 , -0.9893465 ],
       [ 1.6115872 , -1.4155835 ],
       [ 2.2441974 , -2.0904303 ],
       ...,
       [-1.1329212 ,  1.4149877 ],
       [ 0.06289567,  0.243777  ],
       [ 0.9833387 , -0.7273983 ]], dtype=float32), label_ids=array([0, 0, 0, ..., 1, 0, 0]), metrics={'test_loss': 0.7614351511001587, 'test_accuracy': 0.7757100881488737, 'test_precision_weighted': 0.7630092292091304, 'test_recall_weighted': 0.7757100881488737, 'test_recall_micro': 0.7757100881488737, 'test_recall_macro': 0.6632405402295243, 'test_recall_positive': 0.44534412955465585, 'test_recall_negative': 0.8811369509043928, 'test_f1_weighted': 0.7676385525595009, 'test_f1_micro': 0.7757100881488737, 'test_f1_macro': 0.6731119024350688, 'test_f1_positive': 0.4899777282850779, 'test_f1_negative': 0.8562460765850597, 'test_runtime': 1.7098, 'test_samples_per_second': 597.139, 'test_steps_per_second': 37.431})

In [40]:
test_results_mbert.predictions.argmax(axis=1)

array([0, 0, 0, ..., 1, 1, 0])

In [41]:
test_df['hi_dev_hingMbert'] = test_results_mbert.predictions.argmax(axis=1)

In [42]:
print(test_results_mbert.metrics)

{'test_loss': 0.7614351511001587, 'test_accuracy': 0.7757100881488737, 'test_precision_weighted': 0.7630092292091304, 'test_recall_weighted': 0.7757100881488737, 'test_recall_micro': 0.7757100881488737, 'test_recall_macro': 0.6632405402295243, 'test_recall_positive': 0.44534412955465585, 'test_recall_negative': 0.8811369509043928, 'test_f1_weighted': 0.7676385525595009, 'test_f1_micro': 0.7757100881488737, 'test_f1_macro': 0.6731119024350688, 'test_f1_positive': 0.4899777282850779, 'test_f1_negative': 0.8562460765850597, 'test_runtime': 1.7098, 'test_samples_per_second': 597.139, 'test_steps_per_second': 37.431}


In [43]:
wandb.finish()

VBox(children=(Label(value='0.029 MB of 0.029 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▇▁▆▆▅▅▇█▅▇█
eval/f1_macro,▃▁▆▆▆▅▇█▅██
eval/f1_micro,▇▁▆▆▅▅▇█▅▇█
eval/f1_negative,█▁▆▆▆▅▇█▅▇█
eval/f1_positive,▁▆▇▇▇▇█▇▇█▇
eval/f1_weighted,▆▁▆▆▆▅▇█▅▇█
eval/loss,▄▄▂▁▁▂▃▇█▇▇
eval/precision_weighted,▁▄▅▆▇▆▇█▇██
eval/recall_macro,▁▃▆▇▇▇█▇▇█▇
eval/recall_micro,▇▁▆▆▅▅▇█▅▇█

0,1
eval/accuracy,0.7904
eval/f1_macro,0.69104
eval/f1_micro,0.7904
eval/f1_negative,0.86625
eval/f1_positive,0.51584
eval/f1_weighted,0.78148
eval/loss,0.76811
eval/precision_weighted,0.77745
eval/recall_macro,0.67844
eval/recall_micro,0.7904


# **Hing BERT Training**

In [44]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [45]:
## dataset pre processing and alignment
# dataset = load_dataset('csv', data_files={'train': "data/final_train.csv",
#                                               'val':"data/final_val.csv",'test':"data/final_test.csv"})

bert_tokenizer = AutoTokenizer.from_pretrained("l3cube-pune/hing-bert")

bert_dataset = dataset.map(
    lambda example: bert_tokenizer(example['transliterated_text'], max_length=97, padding='max_length', truncation=True),
    batched=True,
    batch_size=64
)


bert_dataset = bert_dataset.remove_columns(["clean_text","language_tags","transliterated_text"])
bert_dataset.set_format("torch")

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/3060 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

In [46]:
## model training
bert_model = AutoModelForSequenceClassification.from_pretrained("l3cube-pune/hing-bert")

bert_model.resize_token_embeddings(len(bert_tokenizer))
arguments = TrainingArguments(
    output_dir="sample_Hing_Bert_trainer_5k_hi_dev",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    optim = 'adamw_torch',
    evaluation_strategy="epoch", # run validation at the end of each epoch
    save_strategy="epoch",
    learning_rate=2e-5,
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    load_best_model_at_end=True,
    metric_for_best_model='eval_f1_weighted',  # Define the metric for early stopping
    greater_is_better=True,  # Set to False because we want to minimize the loss
    save_total_limit=1,
    seed=224,
    report_to="wandb"
)

bert_trainer = WeightedLossTrainer(
    model=bert_model,
    args=arguments,
    train_dataset=bert_dataset['train'],
    eval_dataset=bert_dataset['val'], # change to test when you do your final evaluation!
    tokenizer=bert_tokenizer,
    compute_metrics=compute_metrics
)

bert_trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.01))
bert_trainer.add_callback(LoggingCallback("sample_Hing_Bert_trainer_5k_hi_dev/log.jsonl"))

config.json:   0%|          | 0.00/716 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at l3cube-pune/hing-bert and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [47]:
# train the model
wandb.init(project="rh2", name="Hing_BERT_hi_dev")
bert_trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mshanu-dhawale11[0m ([33mshanu-dhawale11-university-of-galway[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,Recall Micro,Recall Macro,Recall Positive,Recall Negative,F1 Weighted,F1 Micro,F1 Macro,F1 Positive,F1 Negative
1,No log,0.692758,0.733595,0.616228,0.733595,0.733595,0.493498,0.02834,0.958656,0.652498,0.733595,0.447027,0.048951,0.845103
2,No log,0.690083,0.542605,0.686787,0.542605,0.542605,0.571517,0.62753,0.515504,0.574739,0.542605,0.5149,0.39897,0.63083
3,No log,0.685069,0.390793,0.729834,0.390793,0.390793,0.563734,0.898785,0.228682,0.375722,0.390793,0.389608,0.41651,0.362705
4,No log,0.656155,0.522037,0.7398,0.522037,0.522037,0.619975,0.809717,0.430233,0.546478,0.522037,0.513787,0.45045,0.577123
5,No log,0.717491,0.387855,0.740603,0.387855,0.387855,0.567309,0.91498,0.219638,0.368626,0.387855,0.386008,0.419684,0.352332


TrainOutput(global_step=240, training_loss=0.6831630071004232, metrics={'train_runtime': 72.2475, 'train_samples_per_second': 423.544, 'train_steps_per_second': 6.644, 'train_loss': 0.6831630071004232, 'epoch': 5.0})

In [48]:
bert_eval_results = bert_trainer.evaluate()
print(bert_eval_results)

{'eval_loss': 0.6927578449249268, 'eval_accuracy': 0.7335945151811949, 'eval_precision_weighted': 0.6162275846064952, 'eval_recall_weighted': 0.7335945151811949, 'eval_recall_micro': 0.7335945151811949, 'eval_recall_macro': 0.49349820586050697, 'eval_recall_positive': 0.02834008097165992, 'eval_recall_negative': 0.958656330749354, 'eval_f1_weighted': 0.6524977948076924, 'eval_f1_micro': 0.7335945151811949, 'eval_f1_macro': 0.44702677732290486, 'eval_f1_positive': 0.04895104895104896, 'eval_f1_negative': 0.8451025056947608, 'eval_runtime': 1.6041, 'eval_samples_per_second': 636.486, 'eval_steps_per_second': 39.897, 'epoch': 5.0}


# **Test results for Hing-BERT**

In [49]:
bert_results = bert_trainer.predict(bert_dataset['test'])
test_df['trans_predicted_labels_5k_hingBert'] = bert_results.predictions.argmax(axis=1)
print(bert_results.metrics)

{'test_loss': 0.6916566491127014, 'test_accuracy': 0.7473065621939275, 'test_precision_weighted': 0.667123604894527, 'test_recall_weighted': 0.7473065621939275, 'test_recall_micro': 0.7473065621939275, 'test_recall_macro': 0.5135685068365606, 'test_recall_positive': 0.06072874493927125, 'test_recall_negative': 0.9664082687338501, 'test_f1_weighted': 0.6717724581582287, 'test_f1_micro': 0.7473065621939277, 'test_f1_macro': 0.47853715317369827, 'test_f1_positive': 0.10416666666666666, 'test_f1_negative': 0.8529076396807299, 'test_runtime': 1.6224, 'test_samples_per_second': 629.317, 'test_steps_per_second': 39.448}


# **Muril cased training**

In [50]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [51]:
muril_tokenizer = AutoTokenizer.from_pretrained("google/muril-base-cased")
print(len(muril_tokenizer))

## Adding special tokens

print(len(muril_tokenizer))
muril_dataset = dataset.map(
    lambda example: muril_tokenizer(example['transliterated_text'], max_length=97, padding='max_length', truncation=True),
    batched=True,
    batch_size=64
)

muril_dataset = muril_dataset.remove_columns(["clean_text","language_tags","transliterated_text"])
muril_dataset.set_format("torch")

muril_model = AutoModelForSequenceClassification.from_pretrained("google/muril-base-cased",num_labels=2)

muril_model.resize_token_embeddings(len(muril_tokenizer))

arguments = TrainingArguments(
    output_dir="sample_Hing_muril_trainer_5k_hi_dev",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    evaluation_strategy="epoch", # run validation at the end of each epoch
    save_strategy= "epoch",
    learning_rate=3e-5,
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.001,               # strength of weight decay
    load_best_model_at_end=True,
    metric_for_best_model='eval_f1_weighted',  # Define the metric for early stopping
    greater_is_better=True,  # Set to False because we want to minimize the loss
    save_total_limit=1,
    seed=224
)

muril_trainer = WeightedLossTrainer(
    model=muril_model,
    args=arguments,
    train_dataset=muril_dataset['train'],
    eval_dataset=muril_dataset['val'], # change to test when you do your final evaluation!
    tokenizer=muril_tokenizer,
    compute_metrics=compute_metrics
)

muril_trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.01))
muril_trainer.add_callback(LoggingCallback("sample_Hing_muril_trainer_5k_hi_dev/log.jsonl"))

tokenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

197285
197285


Map:   0%|          | 0/3060 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/953M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google/muril-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
wandb.init(project="rh2", name="muril_hi_dev")
muril_trainer.train()

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,█▄▁▄▁█
eval/f1_macro,▄█▁█▁▄
eval/f1_micro,█▄▁▄▁█
eval/f1_negative,█▅▁▄▁█
eval/f1_positive,▁▇▇█▇▁
eval/f1_weighted,█▆▁▅▁█
eval/loss,▅▅▄▁█▅
eval/precision_weighted,▁▅▇██▁
eval/recall_macro,▁▅▅█▅▁
eval/recall_micro,█▄▁▄▁█

0,1
eval/accuracy,0.73359
eval/f1_macro,0.44703
eval/f1_micro,0.73359
eval/f1_negative,0.8451
eval/f1_positive,0.04895
eval/f1_weighted,0.6525
eval/loss,0.69276
eval/precision_weighted,0.61623
eval/recall_macro,0.4935
eval/recall_micro,0.73359


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,Recall Micro,Recall Macro,Recall Positive,Recall Negative,F1 Weighted,F1 Micro,F1 Macro,F1 Positive,F1 Negative
1,No log,0.669592,0.727718,0.773549,0.727718,0.727718,0.706015,0.663968,0.748062,0.742261,0.727718,0.67383,0.541254,0.806407
2,No log,0.61493,0.770813,0.78009,0.770813,0.770813,0.706873,0.582996,0.830749,0.774849,0.770813,0.698888,0.551724,0.846053
3,0.664200,0.625973,0.721841,0.78436,0.721841,0.721841,0.721435,0.720648,0.722222,0.739086,0.721841,0.676841,0.55625,0.797432
4,0.664200,0.571208,0.784525,0.796769,0.784525,0.784525,0.732456,0.631579,0.833333,0.789509,0.784525,0.720385,0.586466,0.854305
5,0.664200,0.607046,0.732615,0.773131,0.732615,0.732615,0.70511,0.651822,0.758398,0.745977,0.732615,0.676255,0.541176,0.811334
6,0.526800,0.636325,0.756121,0.784757,0.756121,0.756121,0.720614,0.651822,0.789406,0.766182,0.756121,0.697325,0.563923,0.830727
7,0.526800,0.74959,0.777669,0.778594,0.777669,0.777669,0.69899,0.546559,0.851421,0.778124,0.777669,0.698167,0.54326,0.853074
8,0.376900,0.81128,0.774731,0.774112,0.774731,0.774731,0.691539,0.530364,0.852713,0.774418,0.774731,0.692067,0.53252,0.851613


TrainOutput(global_step=1536, training_loss=0.5181999752918879, metrics={'train_runtime': 202.5762, 'train_samples_per_second': 151.054, 'train_steps_per_second': 9.478, 'train_loss': 0.5181999752918879, 'epoch': 8.0})

In [53]:
muril_eval_results = muril_trainer.evaluate()
print(muril_eval_results)

{'eval_loss': 0.5712080597877502, 'eval_accuracy': 0.7845249755142018, 'eval_precision_weighted': 0.7967694715325981, 'eval_recall_weighted': 0.7845249755142018, 'eval_recall_micro': 0.7845249755142018, 'eval_recall_macro': 0.7324561403508771, 'eval_recall_positive': 0.631578947368421, 'eval_recall_negative': 0.8333333333333334, 'eval_f1_weighted': 0.7895092369604437, 'eval_f1_micro': 0.7845249755142018, 'eval_f1_macro': 0.7203854005875616, 'eval_f1_positive': 0.5864661654135338, 'eval_f1_negative': 0.8543046357615893, 'eval_runtime': 1.5942, 'eval_samples_per_second': 640.432, 'eval_steps_per_second': 40.145, 'epoch': 8.0}


In [54]:
muril_results = muril_trainer.predict(muril_dataset['test'])
test_df['trans_predicted_labels_5k_muril'] = muril_results.predictions.argmax(axis=1)
print(muril_results.metrics)

{'test_loss': 0.6130938529968262, 'test_accuracy': 0.7512242899118511, 'test_precision_weighted': 0.7686185839079841, 'test_recall_weighted': 0.7512242899118511, 'test_recall_micro': 0.7512242899118511, 'test_recall_macro': 0.6939527560702591, 'test_recall_positive': 0.582995951417004, 'test_recall_negative': 0.8049095607235142, 'test_f1_weighted': 0.7582597771521301, 'test_f1_micro': 0.7512242899118511, 'test_f1_macro': 0.6810159901599016, 'test_f1_positive': 0.5313653136531366, 'test_f1_negative': 0.8306666666666667, 'test_runtime': 1.6298, 'test_samples_per_second': 626.461, 'test_steps_per_second': 39.269}


## XLMR training

In [55]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [56]:
xlmr_tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")

xlmr_dataset = dataset.map(
    lambda example: xlmr_tokenizer(example['transliterated_text'], max_length=194, padding='max_length', truncation=True),
    batched=True,
    batch_size=16
)

xlmr_dataset = xlmr_dataset.remove_columns(["clean_text","language_tags","transliterated_text"])
xlmr_dataset.set_format("torch")

xlmr_model = AutoModelForSequenceClassification.from_pretrained("FacebookAI/xlm-roberta-base",num_labels=2)


arguments = TrainingArguments(
    output_dir="sample_Hing_xlmr_trainer_5k_hi_dev",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    evaluation_strategy="epoch", # run validation at the end of each epoch
    save_strategy= "epoch",
    learning_rate=2e-5,
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.001,               # strength of weight decay
    load_best_model_at_end=True,
     metric_for_best_model='eval_f1_weighted',  # Define the metric for early stopping
    greater_is_better=True,  # Set to False because we want to minimize the loss
    save_total_limit=1,
    seed=224,
    report_to="wandb"
)

xlmr_trainer = WeightedLossTrainer(
    model=xlmr_model,
    args=arguments,
    train_dataset=xlmr_dataset['train'],
    eval_dataset=xlmr_dataset['val'], # change to test when you do your final evaluation!
    tokenizer=xlmr_tokenizer,
    compute_metrics=compute_metrics
)

xlmr_trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=4, early_stopping_threshold=0.01))
xlmr_trainer.add_callback(LoggingCallback("sample_Hing_xlmr_trainer_5k_hi_dev/log.jsonl"))

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Map:   0%|          | 0/3060 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

Map:   0%|          | 0/1021 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [57]:
import torch
torch.cuda.empty_cache()

In [58]:
wandb.init(project="rh2", name="xlmr_hi_dev")
xlmr_trainer.train()

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▆▁█▂▅▇▇█
eval/f1_macro,▁▅▁█▁▅▅▄█
eval/f1_micro,▂▆▁█▂▅▇▇█
eval/f1_negative,▂▇▁█▃▅███
eval/f1_positive,▂▃▄█▂▅▂▁█
eval/f1_weighted,▁▆▁█▂▅▆▆█
eval/loss,▄▂▃▁▂▃▆█▁
eval/precision_weighted,▁▃▄█▁▄▃▁█
eval/recall_macro,▃▄▆█▃▆▂▁█
eval/recall_micro,▂▆▁█▂▅▇▇█

0,1
eval/accuracy,0.78452
eval/f1_macro,0.72039
eval/f1_micro,0.78452
eval/f1_negative,0.8543
eval/f1_positive,0.58647
eval/f1_weighted,0.78951
eval/loss,0.57121
eval/precision_weighted,0.79677
eval/recall_macro,0.73246
eval/recall_micro,0.78452


Epoch,Training Loss,Validation Loss,Accuracy,Precision Weighted,Recall Weighted,Recall Micro,Recall Macro,Recall Positive,Recall Negative,F1 Weighted,F1 Micro,F1 Macro,F1 Positive,F1 Negative
1,No log,0.685034,0.423115,0.748457,0.423115,0.423115,0.58643,0.902834,0.270026,0.418922,0.423115,0.423006,0.430918,0.415094
2,No log,0.566247,0.685602,0.791372,0.685602,0.685602,0.723721,0.797571,0.649871,0.708011,0.685602,0.654575,0.551049,0.758101
3,0.652800,0.609234,0.764936,0.784699,0.764936,0.764936,0.718158,0.62753,0.808786,0.772492,0.764936,0.701389,0.563636,0.839142
4,0.652800,0.647093,0.795299,0.795017,0.795299,0.795299,0.720266,0.574899,0.865633,0.795157,0.795299,0.72057,0.576065,0.865074
5,0.652800,0.629918,0.664055,0.79693,0.664055,0.664055,0.723292,0.838057,0.608527,0.688034,0.664055,0.639985,0.546896,0.733074
6,0.511800,0.654369,0.780607,0.782444,0.780607,0.780607,0.705063,0.558704,0.851421,0.781497,0.780607,0.703367,0.552,0.854734
7,0.511800,0.959209,0.796278,0.789043,0.796278,0.796278,0.702994,0.522267,0.883721,0.791967,0.796278,0.710834,0.553648,0.86802
8,0.325300,1.079482,0.777669,0.783054,0.777669,0.777669,0.708638,0.574899,0.842377,0.780133,0.777669,0.703752,0.555773,0.851731


TrainOutput(global_step=1536, training_loss=0.4912841102729241, metrics={'train_runtime': 322.2142, 'train_samples_per_second': 94.968, 'train_steps_per_second': 5.959, 'train_loss': 0.4912841102729241, 'epoch': 8.0})

In [59]:
xlmr_eval_results = xlmr_trainer.evaluate()
print(xlmr_eval_results)

{'eval_loss': 0.6470926403999329, 'eval_accuracy': 0.7952987267384917, 'eval_precision_weighted': 0.795017355184452, 'eval_recall_weighted': 0.7952987267384917, 'eval_recall_micro': 0.7952987267384917, 'eval_recall_macro': 0.7202659301802509, 'eval_recall_positive': 0.5748987854251012, 'eval_recall_negative': 0.8656330749354005, 'eval_f1_weighted': 0.7951571942542977, 'eval_f1_micro': 0.7952987267384917, 'eval_f1_macro': 0.7205695750841018, 'eval_f1_positive': 0.5760649087221095, 'eval_f1_negative': 0.8650742414460941, 'eval_runtime': 2.9353, 'eval_samples_per_second': 347.833, 'eval_steps_per_second': 21.803, 'epoch': 8.0}


In [60]:
xlmr_results = xlmr_trainer.predict(xlmr_dataset['test'])
test_df['trans_predicted_labels_5k_xlmr'] = xlmr_results.predictions.argmax(axis=1)
print(xlmr_results.metrics)

{'test_loss': 0.6418144702911377, 'test_accuracy': 0.7825661116552399, 'test_precision_weighted': 0.7813778042905246, 'test_recall_weighted': 0.7825661116552399, 'test_recall_micro': 0.7825661116552399, 'test_recall_macro': 0.7008416240362385, 'test_recall_positive': 0.5425101214574899, 'test_recall_negative': 0.8591731266149871, 'test_f1_weighted': 0.7819588247064126, 'test_f1_micro': 0.7825661116552399, 'test_f1_macro': 0.7019487691984011, 'test_f1_positive': 0.5469387755102042, 'test_f1_negative': 0.856958762886598, 'test_runtime': 3.0009, 'test_samples_per_second': 340.226, 'test_steps_per_second': 21.327}


In [61]:
wandb.finish()
test_df.to_csv('transliteration_v2.csv',index=False, encoding='utf-8')

VBox(children=(Label(value='0.004 MB of 0.015 MB uploaded\r'), FloatProgress(value=0.26504416343648723, max=1.…

0,1
eval/accuracy,▁▆▇█▆████
eval/f1_macro,▁▆██▆████
eval/f1_micro,▁▆▇█▆████
eval/f1_negative,▁▆██▆████
eval/f1_positive,▁▇▇█▇▇▇▇█
eval/f1_weighted,▁▆██▆████
eval/loss,▃▁▂▂▂▂▆█▂
eval/precision_weighted,▁▇▆██▆▇▆█
eval/recall_macro,▁████▇▇▇█
eval/recall_micro,▁▆▇█▆████

0,1
eval/accuracy,0.7953
eval/f1_macro,0.72057
eval/f1_micro,0.7953
eval/f1_negative,0.86507
eval/f1_positive,0.57606
eval/f1_weighted,0.79516
eval/loss,0.64709
eval/precision_weighted,0.79502
eval/recall_macro,0.72027
eval/recall_micro,0.7953


In [62]:
test_df['transliterate'] = dataset['test']['transliterated_text']
test_df.to_csv('transliteration_v221.csv',index=False, encoding='utf-8')