In [1]:
import pandas as pd
from simpletransformers.classification import ClassificationModel
import pandas as pd
import csv
import torch
import gc
from numba import cuda
from sklearn.metrics import accuracy_score, f1_score

In [2]:
train_path = r"data/train.jsonl"
validation_path = r"data/validation.jsonl"

In [3]:
def label_encoding(text, target=""):
    if target:
        if text == target:
            return 1
        else:
            return 0
    else:
        if text == "phrase":
            return 0
        elif text == "passage":
            return 1
        elif text == "multi":
            return 2
        else:
            return text    
        
def load_data(file):
    df = pd.read_json(file, lines=True)
    df["text"] = df["postText"].explode() + df["targetTitle"].explode()
    df = df.loc[:, ["text", "tags"]]
    df.tags = df.tags.explode()
    df.tags = df.tags.apply(label_encoding, args=("", ))
    return df

In [4]:
train_set = load_data(train_path)
train_set

Unnamed: 0,text,tags
0,"Wes Welker Wanted Dinner With Tom Brady, But P...",1
1,NASA sets date for full recovery of ozone hole...,0
2,This is what makes employees happy -- and it's...,0
3,Passion is overrated — 7 work habits you need ...,2
4,The perfect way to cook rice so that it's perf...,0
...,...,...
3195,Has Facebook's video explosion completely shak...,1
3196,Cop Is Eating At A Chili's When Teen Hands Him...,1
3197,5 popular myths about visible signs of aging t...,2
3198,You need to see this Twitter account that pred...,0


In [5]:
validation_set = load_data(validation_path)
validation_set

Unnamed: 0,text,tags
0,Five Nights at Freddy’s Sequel Delayed for Wei...,1
1,Why Arizona Sheriff Joe Arpaio’s fate could ha...,2
2,Here’s how much you should be tipping your hai...,0
3,"""Harry Potter"" alums reunite for new movieAlan...",2
4,A man swallowed a microSD card and you won't b...,1
...,...,...
795,This is what happens when you leave a hotel cl...,1
796,This Texas GOP elector announces that he won't...,0
797,This beauty queen cured her acne with one diet...,2
798,WikiLeaks' Julian Assange Reported DeadWikiLea...,1


In [6]:
configurations = []

for learning_rate in [4e-6, 1e-5, 4e-5]:
    for warumup_ratio in [0.02, 0.06, 0.1]:
        configurations += [{
            "overwrite_output_dir": True,
            "num_train_epochs": 10,
            "learning_rate": learning_rate,
            "warmup_ratio": warumup_ratio,
            "best_model_dir": r"outputs/deberta/best_model",
            "output_dir" : "outputs/"
        }]


In [7]:
results = []

for config in configurations:
    
    config["output_dir"] = r"ouputs/deberta/deberta_" + str(config["learning_rate"]) + "_" + str(config["warmup_ratio"])
        
    model = ClassificationModel("deberta", "microsoft/deberta-base", num_labels=3, args=config, ignore_mismatched_sizes=True)
    model.train_model(train_set, eval_df=validation_set, acc=accuracy_score)
    train = model.eval_model(train_set, acc=accuracy_score)
    evaluation = model.eval_model(validation_set, acc=accuracy_score)
    
    results += [(config["learning_rate"], config["warmup_ratio"], train[0], evaluation[0])]
    model = None
    gc.collect()
    torch.cuda.empty_cache()

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaForSequenceClassification: ['lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-base and are newly initialized: ['classifier.weight', 'pooler.d

  0%|          | 0/3200 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/400 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/3200 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/400 [00:00<?, ?it/s]



  0%|          | 0/800 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/100 [00:00<?, ?it/s]

In [52]:
results_dict = {"Model name" : [],
                "Learning rate" : [],
                "Warmup Ratio" : [],
                "acc" : [],
                "eval_loss" : []
               }
for config in configurations:
    model_name = str(config["learning_rate"]) + "_" + str(config["warmup_ratio"])
    with open(r"ouputs/deberta/deberta_" + model_name + r"/eval_results.txt", "r+") as file:
        data = file.readlines()
        results_dict["Model name"].append("Deberta " + model_name)
        results_dict["Learning rate"].append(config["learning_rate"])
        results_dict["Warmup Ratio"].append(config["warmup_ratio"])
        results_dict["acc"].append(data[0][6:-1])
        results_dict["eval_loss"].append(data[1][12:-1])
df = pd.DataFrame.from_dict(results_dict).sort_values("acc", ascending=False)
df.to_csv("deberta_validation_results.csv")
df

Unnamed: 0,Model name,Learning rate,Warmup Ratio,acc,eval_loss
4,Deberta 1e-05_0.06,1e-05,0.06,0.73625,1.7841331450027065
2,Deberta 4e-06_0.1,4e-06,0.1,0.725,0.9825311375781892
5,Deberta 1e-05_0.1,1e-05,0.1,0.72375,1.8955837418939336
6,Deberta 4e-05_0.02,4e-05,0.02,0.7175,2.496159424145953
0,Deberta 4e-06_0.02,4e-06,0.02,0.7125,0.9842591169849038
3,Deberta 1e-05_0.02,1e-05,0.02,0.70125,1.9736555156629765
1,Deberta 4e-06_0.06,4e-06,0.06,0.69875,0.974842406734824
8,Deberta 4e-05_0.1,4e-05,0.1,0.68375,2.4623364060716995
7,Deberta 4e-05_0.06,4e-05,0.06,0.6775,2.558174219107168
