# Deberta base

## Libraries

In [1]:
import polars as pl
import numpy as np
from polars import DataFrame, col
from transformers import AutoModelForSequenceClassification
from label_legends.deberta import load_dataset, load_deberta, load_dataset
from label_legends.preprocess import holdout, load_data, load_test, transform
from label_legends.female import predict_female
from label_legends.result import calculate_scores

  from .autonotebook import tqdm as notebook_tqdm


## Running the model

In [2]:
deberta_base = load_deberta()

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
train_out_base = deberta_base.train()
train_out_base.metrics

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  1%|          | 100/9190 [01:00<1:26:41,  1.75it/s]

{'loss': 0.5966, 'grad_norm': 1.6980220079421997, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.05}


  2%|▏         | 200/9190 [01:58<1:27:50,  1.71it/s]

{'loss': 0.5607, 'grad_norm': 3.5036559104919434, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.11}


  3%|▎         | 300/9190 [02:56<1:26:15,  1.72it/s]

{'loss': 0.4623, 'grad_norm': 5.668813228607178, 'learning_rate': 1.988814317673378e-05, 'epoch': 0.16}


  4%|▍         | 400/9190 [03:55<1:26:50,  1.69it/s]

{'loss': 0.4686, 'grad_norm': 6.7858757972717285, 'learning_rate': 1.9664429530201344e-05, 'epoch': 0.22}


  5%|▌         | 500/9190 [04:53<1:24:22,  1.72it/s]

{'loss': 0.4049, 'grad_norm': 2.5386195182800293, 'learning_rate': 1.9440715883668907e-05, 'epoch': 0.27}


  7%|▋         | 600/9190 [05:52<1:23:41,  1.71it/s]

{'loss': 0.4065, 'grad_norm': 2.2010629177093506, 'learning_rate': 1.9217002237136467e-05, 'epoch': 0.33}


  8%|▊         | 700/9190 [06:50<1:23:20,  1.70it/s]

{'loss': 0.3923, 'grad_norm': 4.094524383544922, 'learning_rate': 1.899328859060403e-05, 'epoch': 0.38}


  9%|▊         | 800/9190 [07:48<1:20:33,  1.74it/s]

{'loss': 0.4038, 'grad_norm': 2.4061806201934814, 'learning_rate': 1.876957494407159e-05, 'epoch': 0.44}


 10%|▉         | 900/9190 [08:46<1:19:57,  1.73it/s]

{'loss': 0.3985, 'grad_norm': 3.4057297706604004, 'learning_rate': 1.8545861297539152e-05, 'epoch': 0.49}


 11%|█         | 1000/9190 [09:44<1:20:09,  1.70it/s]

{'loss': 0.3699, 'grad_norm': 2.173457145690918, 'learning_rate': 1.832214765100671e-05, 'epoch': 0.54}


 12%|█▏        | 1100/9190 [10:43<1:19:59,  1.69it/s]

{'loss': 0.3498, 'grad_norm': 3.884676456451416, 'learning_rate': 1.8098434004474274e-05, 'epoch': 0.6}


 13%|█▎        | 1200/9190 [11:42<1:16:58,  1.73it/s]

{'loss': 0.394, 'grad_norm': 4.324994087219238, 'learning_rate': 1.7874720357941834e-05, 'epoch': 0.65}


 14%|█▍        | 1300/9190 [12:42<1:19:13,  1.66it/s]

{'loss': 0.3604, 'grad_norm': 4.408195972442627, 'learning_rate': 1.7651006711409397e-05, 'epoch': 0.71}


 15%|█▌        | 1400/9190 [13:43<1:15:19,  1.72it/s]

{'loss': 0.3634, 'grad_norm': 3.120213031768799, 'learning_rate': 1.7427293064876956e-05, 'epoch': 0.76}


 16%|█▋        | 1500/9190 [14:42<1:15:25,  1.70it/s]

{'loss': 0.3453, 'grad_norm': 3.819579601287842, 'learning_rate': 1.720357941834452e-05, 'epoch': 0.82}


 17%|█▋        | 1600/9190 [15:41<1:20:35,  1.57it/s]

{'loss': 0.332, 'grad_norm': 5.457400321960449, 'learning_rate': 1.6979865771812082e-05, 'epoch': 0.87}


 18%|█▊        | 1700/9190 [16:40<1:12:23,  1.72it/s]

{'loss': 0.379, 'grad_norm': 4.060813903808594, 'learning_rate': 1.675615212527964e-05, 'epoch': 0.92}


 20%|█▉        | 1800/9190 [17:39<1:13:28,  1.68it/s]

{'loss': 0.3538, 'grad_norm': 4.248988151550293, 'learning_rate': 1.6532438478747205e-05, 'epoch': 0.98}


                                                     
 20%|██        | 1838/9190 [20:10<1:12:23,  1.69it/s]

{'eval_loss': 0.3308253884315491, 'eval_precision': 0.768288414420721, 'eval_recall': 0.6808312655086849, 'eval_fscore': 0.721920736720934, 'eval_accuracy': 0.8657936507936508, 'eval_tp': 2195, 'eval_tn': 8714, 'eval_fp': 662, 'eval_fn': 1029, 'eval_runtime': 128.4107, 'eval_samples_per_second': 98.123, 'eval_steps_per_second': 6.137, 'epoch': 1.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 21%|██        | 1900/9190 [20:50<1:10:10,  1.73it/s] 

{'loss': 0.3404, 'grad_norm': 1.9112190008163452, 'learning_rate': 1.6308724832214767e-05, 'epoch': 1.03}


 22%|██▏       | 2000/9190 [21:51<1:12:08,  1.66it/s]

{'loss': 0.2903, 'grad_norm': 2.857774496078491, 'learning_rate': 1.608501118568233e-05, 'epoch': 1.09}


 23%|██▎       | 2100/9190 [22:52<1:11:17,  1.66it/s]

{'loss': 0.3013, 'grad_norm': 35.07514190673828, 'learning_rate': 1.586129753914989e-05, 'epoch': 1.14}


 24%|██▍       | 2200/9190 [23:52<1:10:08,  1.66it/s]

{'loss': 0.3018, 'grad_norm': 5.1333441734313965, 'learning_rate': 1.5637583892617453e-05, 'epoch': 1.2}


 25%|██▌       | 2300/9190 [24:52<1:09:03,  1.66it/s]

{'loss': 0.2791, 'grad_norm': 10.539129257202148, 'learning_rate': 1.5413870246085012e-05, 'epoch': 1.25}


 26%|██▌       | 2400/9190 [25:52<1:08:05,  1.66it/s]

{'loss': 0.3311, 'grad_norm': 2.6598334312438965, 'learning_rate': 1.5190156599552575e-05, 'epoch': 1.31}


 27%|██▋       | 2500/9190 [26:53<1:07:05,  1.66it/s]

{'loss': 0.2981, 'grad_norm': 1.9329757690429688, 'learning_rate': 1.4966442953020135e-05, 'epoch': 1.36}


 28%|██▊       | 2600/9190 [27:54<1:06:54,  1.64it/s]

{'loss': 0.3077, 'grad_norm': 3.13865327835083, 'learning_rate': 1.4742729306487698e-05, 'epoch': 1.41}


 29%|██▉       | 2700/9190 [28:54<1:05:25,  1.65it/s]

{'loss': 0.309, 'grad_norm': 6.660081386566162, 'learning_rate': 1.4519015659955257e-05, 'epoch': 1.47}


 30%|███       | 2800/9190 [29:55<1:04:07,  1.66it/s]

{'loss': 0.3036, 'grad_norm': 4.925863742828369, 'learning_rate': 1.429530201342282e-05, 'epoch': 1.52}


 32%|███▏      | 2900/9190 [30:55<1:03:08,  1.66it/s]

{'loss': 0.2962, 'grad_norm': 5.297201633453369, 'learning_rate': 1.4071588366890381e-05, 'epoch': 1.58}


 33%|███▎      | 3000/9190 [31:57<1:01:57,  1.66it/s]

{'loss': 0.3008, 'grad_norm': 2.9381930828094482, 'learning_rate': 1.3847874720357944e-05, 'epoch': 1.63}


 34%|███▎      | 3100/9190 [32:57<1:02:52,  1.61it/s]

{'loss': 0.3135, 'grad_norm': 3.061150312423706, 'learning_rate': 1.3624161073825504e-05, 'epoch': 1.69}


 35%|███▍      | 3200/9190 [33:58<1:00:20,  1.65it/s]

{'loss': 0.3049, 'grad_norm': 1.1323641538619995, 'learning_rate': 1.3400447427293066e-05, 'epoch': 1.74}


 36%|███▌      | 3300/9190 [34:58<59:12,  1.66it/s]  

{'loss': 0.2997, 'grad_norm': 6.205170154571533, 'learning_rate': 1.3176733780760626e-05, 'epoch': 1.8}


 37%|███▋      | 3400/9190 [35:59<58:28,  1.65it/s]

{'loss': 0.3118, 'grad_norm': 3.6217634677886963, 'learning_rate': 1.2953020134228189e-05, 'epoch': 1.85}


 38%|███▊      | 3500/9190 [36:59<57:17,  1.66it/s]

{'loss': 0.3053, 'grad_norm': 1.801722526550293, 'learning_rate': 1.272930648769575e-05, 'epoch': 1.9}


 39%|███▉      | 3600/9190 [37:59<56:16,  1.66it/s]

{'loss': 0.311, 'grad_norm': 5.741366863250732, 'learning_rate': 1.2505592841163311e-05, 'epoch': 1.96}


                                                   
 40%|████      | 3676/9190 [40:58<51:58,  1.77it/s]

{'eval_loss': 0.35349056124687195, 'eval_precision': 0.7815845824411135, 'eval_recall': 0.6792803970223326, 'eval_fscore': 0.726850315300365, 'eval_accuracy': 0.8693650793650793, 'eval_tp': 2190, 'eval_tn': 8764, 'eval_fp': 612, 'eval_fn': 1034, 'eval_runtime': 132.5242, 'eval_samples_per_second': 95.077, 'eval_steps_per_second': 5.946, 'epoch': 2.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 40%|████      | 3700/9190 [41:15<56:22,  1.62it/s]   

{'loss': 0.2973, 'grad_norm': 5.2735748291015625, 'learning_rate': 1.2281879194630872e-05, 'epoch': 2.01}


 41%|████▏     | 3800/9190 [42:15<54:16,  1.65it/s]

{'loss': 0.216, 'grad_norm': 4.954854488372803, 'learning_rate': 1.2058165548098435e-05, 'epoch': 2.07}


 42%|████▏     | 3900/9190 [43:16<53:13,  1.66it/s]

{'loss': 0.2456, 'grad_norm': 8.216095924377441, 'learning_rate': 1.1834451901565998e-05, 'epoch': 2.12}


 44%|████▎     | 4000/9190 [44:16<52:03,  1.66it/s]

{'loss': 0.2299, 'grad_norm': 1.5706144571304321, 'learning_rate': 1.1610738255033558e-05, 'epoch': 2.18}


 45%|████▍     | 4100/9190 [45:17<51:09,  1.66it/s]

{'loss': 0.2592, 'grad_norm': 5.643367767333984, 'learning_rate': 1.138702460850112e-05, 'epoch': 2.23}


 46%|████▌     | 4200/9190 [46:17<50:16,  1.65it/s]

{'loss': 0.2665, 'grad_norm': 4.419868469238281, 'learning_rate': 1.116331096196868e-05, 'epoch': 2.29}


 47%|████▋     | 4300/9190 [47:18<49:10,  1.66it/s]

{'loss': 0.2556, 'grad_norm': 4.2000627517700195, 'learning_rate': 1.0939597315436243e-05, 'epoch': 2.34}


 48%|████▊     | 4400/9190 [48:18<48:10,  1.66it/s]

{'loss': 0.2594, 'grad_norm': 1.668300747871399, 'learning_rate': 1.0715883668903804e-05, 'epoch': 2.39}


 49%|████▉     | 4500/9190 [49:19<47:09,  1.66it/s]

{'loss': 0.2608, 'grad_norm': 4.930062294006348, 'learning_rate': 1.0492170022371365e-05, 'epoch': 2.45}


 50%|█████     | 4600/9190 [50:19<46:11,  1.66it/s]

{'loss': 0.2786, 'grad_norm': 2.399294376373291, 'learning_rate': 1.0268456375838927e-05, 'epoch': 2.5}


 51%|█████     | 4700/9190 [51:20<45:15,  1.65it/s]

{'loss': 0.246, 'grad_norm': 5.369772911071777, 'learning_rate': 1.004474272930649e-05, 'epoch': 2.56}


 52%|█████▏    | 4800/9190 [52:20<44:13,  1.65it/s]

{'loss': 0.2692, 'grad_norm': 5.367950439453125, 'learning_rate': 9.821029082774049e-06, 'epoch': 2.61}


 53%|█████▎    | 4900/9190 [53:21<43:00,  1.66it/s]

{'loss': 0.2663, 'grad_norm': 1.385954737663269, 'learning_rate': 9.59731543624161e-06, 'epoch': 2.67}


 54%|█████▍    | 5000/9190 [54:21<42:07,  1.66it/s]

{'loss': 0.2596, 'grad_norm': 3.2515056133270264, 'learning_rate': 9.373601789709173e-06, 'epoch': 2.72}


 55%|█████▌    | 5100/9190 [55:22<41:27,  1.64it/s]

{'loss': 0.2451, 'grad_norm': 8.137075424194336, 'learning_rate': 9.149888143176734e-06, 'epoch': 2.77}


 57%|█████▋    | 5200/9190 [56:23<40:41,  1.63it/s]

{'loss': 0.2376, 'grad_norm': 2.5697922706604004, 'learning_rate': 8.926174496644297e-06, 'epoch': 2.83}


 58%|█████▊    | 5300/9190 [57:24<39:13,  1.65it/s]

{'loss': 0.2573, 'grad_norm': 4.437542915344238, 'learning_rate': 8.702460850111859e-06, 'epoch': 2.88}


 59%|█████▉    | 5400/9190 [58:25<38:08,  1.66it/s]

{'loss': 0.2698, 'grad_norm': 6.3056230545043945, 'learning_rate': 8.47874720357942e-06, 'epoch': 2.94}


 60%|█████▉    | 5500/9190 [59:25<37:54,  1.62it/s]

{'loss': 0.2842, 'grad_norm': 4.976691722869873, 'learning_rate': 8.255033557046981e-06, 'epoch': 2.99}


                                                   
 60%|██████    | 5514/9190 [1:01:47<35:27,  1.73it/s]

{'eval_loss': 0.3558124601840973, 'eval_precision': 0.7436556376485705, 'eval_recall': 0.7180521091811415, 'eval_fscore': 0.7306296354741991, 'eval_accuracy': 0.8645238095238095, 'eval_tp': 2315, 'eval_tn': 8578, 'eval_fp': 798, 'eval_fn': 909, 'eval_runtime': 132.9688, 'eval_samples_per_second': 94.759, 'eval_steps_per_second': 5.926, 'epoch': 3.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 61%|██████    | 5600/9190 [1:02:41<36:08,  1.66it/s]   

{'loss': 0.212, 'grad_norm': 4.254471778869629, 'learning_rate': 8.031319910514542e-06, 'epoch': 3.05}


 62%|██████▏   | 5700/9190 [1:03:41<34:33,  1.68it/s]

{'loss': 0.2057, 'grad_norm': 6.077566146850586, 'learning_rate': 7.807606263982103e-06, 'epoch': 3.1}


 63%|██████▎   | 5800/9190 [1:04:40<32:49,  1.72it/s]

{'loss': 0.2082, 'grad_norm': 2.4304559230804443, 'learning_rate': 7.583892617449665e-06, 'epoch': 3.16}


 64%|██████▍   | 5900/9190 [1:05:40<34:02,  1.61it/s]

{'loss': 0.21, 'grad_norm': 3.3251307010650635, 'learning_rate': 7.360178970917227e-06, 'epoch': 3.21}


 65%|██████▌   | 6000/9190 [1:06:40<31:34,  1.68it/s]

{'loss': 0.2165, 'grad_norm': 2.673262119293213, 'learning_rate': 7.136465324384788e-06, 'epoch': 3.26}


 66%|██████▋   | 6100/9190 [1:07:37<29:38,  1.74it/s]

{'loss': 0.2179, 'grad_norm': 4.817668437957764, 'learning_rate': 6.91275167785235e-06, 'epoch': 3.32}


 67%|██████▋   | 6200/9190 [1:08:35<29:00,  1.72it/s]

{'loss': 0.2253, 'grad_norm': 3.465660810470581, 'learning_rate': 6.689038031319911e-06, 'epoch': 3.37}


 69%|██████▊   | 6300/9190 [1:09:34<27:55,  1.72it/s]

{'loss': 0.2139, 'grad_norm': 3.2685372829437256, 'learning_rate': 6.465324384787472e-06, 'epoch': 3.43}


 70%|██████▉   | 6400/9190 [1:10:33<27:29,  1.69it/s]

{'loss': 0.235, 'grad_norm': 2.339691638946533, 'learning_rate': 6.241610738255034e-06, 'epoch': 3.48}


 71%|███████   | 6500/9190 [1:11:31<25:49,  1.74it/s]

{'loss': 0.217, 'grad_norm': 5.387319087982178, 'learning_rate': 6.0178970917225955e-06, 'epoch': 3.54}


 72%|███████▏  | 6600/9190 [1:12:29<24:49,  1.74it/s]

{'loss': 0.2313, 'grad_norm': 0.39320874214172363, 'learning_rate': 5.794183445190157e-06, 'epoch': 3.59}


 73%|███████▎  | 6700/9190 [1:13:27<27:34,  1.51it/s]

{'loss': 0.2324, 'grad_norm': 1.961767554283142, 'learning_rate': 5.570469798657718e-06, 'epoch': 3.65}


 74%|███████▍  | 6800/9190 [1:14:26<22:43,  1.75it/s]

{'loss': 0.2339, 'grad_norm': 9.892226219177246, 'learning_rate': 5.34675615212528e-06, 'epoch': 3.7}


 75%|███████▌  | 6900/9190 [1:15:25<22:17,  1.71it/s]

{'loss': 0.2237, 'grad_norm': 2.493884325027466, 'learning_rate': 5.123042505592841e-06, 'epoch': 3.75}


 76%|███████▌  | 7000/9190 [1:16:22<21:10,  1.72it/s]

{'loss': 0.2443, 'grad_norm': 2.364342451095581, 'learning_rate': 4.899328859060403e-06, 'epoch': 3.81}


 77%|███████▋  | 7100/9190 [1:17:20<20:01,  1.74it/s]

{'loss': 0.2274, 'grad_norm': 7.628083229064941, 'learning_rate': 4.675615212527964e-06, 'epoch': 3.86}


 78%|███████▊  | 7200/9190 [1:18:17<19:03,  1.74it/s]

{'loss': 0.2178, 'grad_norm': 0.3319874405860901, 'learning_rate': 4.4519015659955265e-06, 'epoch': 3.92}


 79%|███████▉  | 7300/9190 [1:19:15<18:06,  1.74it/s]

{'loss': 0.2513, 'grad_norm': 3.0379738807678223, 'learning_rate': 4.228187919463088e-06, 'epoch': 3.97}


                                                     
 80%|████████  | 7352/9190 [1:21:52<16:27,  1.86it/s]

{'eval_loss': 0.4256826937198639, 'eval_precision': 0.756838905775076, 'eval_recall': 0.6950992555831266, 'eval_fscore': 0.7246564268391269, 'eval_accuracy': 0.8648412698412699, 'eval_tp': 2241, 'eval_tn': 8656, 'eval_fp': 720, 'eval_fn': 983, 'eval_runtime': 126.9357, 'eval_samples_per_second': 99.263, 'eval_steps_per_second': 6.208, 'epoch': 4.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
 81%|████████  | 7400/9190 [1:22:22<17:10,  1.74it/s]   

{'loss': 0.1866, 'grad_norm': 7.032719135284424, 'learning_rate': 4.004474272930649e-06, 'epoch': 4.03}


 82%|████████▏ | 7500/9190 [1:23:20<16:11,  1.74it/s]

{'loss': 0.1857, 'grad_norm': 6.415452003479004, 'learning_rate': 3.7807606263982105e-06, 'epoch': 4.08}


 83%|████████▎ | 7600/9190 [1:24:17<15:14,  1.74it/s]

{'loss': 0.1726, 'grad_norm': 8.872213363647461, 'learning_rate': 3.557046979865772e-06, 'epoch': 4.13}


 84%|████████▍ | 7700/9190 [1:33:50<14:07,  1.76it/s]    

{'loss': 0.2203, 'grad_norm': 8.969600677490234, 'learning_rate': 3.3333333333333333e-06, 'epoch': 4.19}


 85%|████████▍ | 7800/9190 [1:47:48<12:57,  1.79it/s]    

{'loss': 0.2281, 'grad_norm': 5.398549556732178, 'learning_rate': 3.109619686800895e-06, 'epoch': 4.24}


 86%|████████▌ | 7900/9190 [1:48:48<12:47,  1.68it/s]

{'loss': 0.1903, 'grad_norm': 4.45667028427124, 'learning_rate': 2.885906040268457e-06, 'epoch': 4.3}


 87%|████████▋ | 8000/9190 [1:49:48<11:45,  1.69it/s]

{'loss': 0.1896, 'grad_norm': 8.96572208404541, 'learning_rate': 2.662192393736018e-06, 'epoch': 4.35}


 88%|████████▊ | 8100/9190 [1:50:47<10:46,  1.69it/s]

{'loss': 0.1976, 'grad_norm': 7.991313457489014, 'learning_rate': 2.4384787472035794e-06, 'epoch': 4.41}


 89%|████████▉ | 8200/9190 [1:51:46<09:39,  1.71it/s]

{'loss': 0.2174, 'grad_norm': 4.709577560424805, 'learning_rate': 2.2147651006711415e-06, 'epoch': 4.46}


 90%|█████████ | 8300/9190 [1:52:46<09:17,  1.60it/s]

{'loss': 0.2027, 'grad_norm': 2.1697707176208496, 'learning_rate': 1.9910514541387027e-06, 'epoch': 4.52}


 91%|█████████▏| 8400/9190 [1:53:45<07:44,  1.70it/s]

{'loss': 0.1916, 'grad_norm': 4.706316947937012, 'learning_rate': 1.767337807606264e-06, 'epoch': 4.57}


 92%|█████████▏| 8500/9190 [1:54:44<06:44,  1.70it/s]

{'loss': 0.2063, 'grad_norm': 3.1391196250915527, 'learning_rate': 1.5436241610738257e-06, 'epoch': 4.62}


 94%|█████████▎| 8600/9190 [1:55:43<05:45,  1.71it/s]

{'loss': 0.2026, 'grad_norm': 6.923135757446289, 'learning_rate': 1.3199105145413871e-06, 'epoch': 4.68}


 95%|█████████▍| 8700/9190 [1:56:42<04:47,  1.70it/s]

{'loss': 0.1819, 'grad_norm': 7.960139751434326, 'learning_rate': 1.0961968680089487e-06, 'epoch': 4.73}


 96%|█████████▌| 8800/9190 [1:57:41<03:53,  1.67it/s]

{'loss': 0.2055, 'grad_norm': 0.49467986822128296, 'learning_rate': 8.724832214765102e-07, 'epoch': 4.79}


 97%|█████████▋| 8900/9190 [1:58:41<02:51,  1.69it/s]

{'loss': 0.1823, 'grad_norm': 8.198399543762207, 'learning_rate': 6.487695749440716e-07, 'epoch': 4.84}


 98%|█████████▊| 9000/9190 [1:59:42<01:54,  1.67it/s]

{'loss': 0.2008, 'grad_norm': 7.3534464836120605, 'learning_rate': 4.2505592841163315e-07, 'epoch': 4.9}


 99%|█████████▉| 9100/9190 [2:00:42<00:53,  1.68it/s]

{'loss': 0.1945, 'grad_norm': 6.858604907989502, 'learning_rate': 2.0134228187919465e-07, 'epoch': 4.95}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
                                                     
100%|██████████| 9190/9190 [2:03:54<00:00,  1.83it/s]

{'eval_loss': 0.4600447118282318, 'eval_precision': 0.7226209048361935, 'eval_recall': 0.7183622828784119, 'eval_fscore': 0.7204853009799347, 'eval_accuracy': 0.8573809523809524, 'eval_tp': 2316, 'eval_tn': 8487, 'eval_fp': 889, 'eval_fn': 908, 'eval_runtime': 135.6113, 'eval_samples_per_second': 92.913, 'eval_steps_per_second': 5.811, 'epoch': 5.0}


100%|██████████| 9190/9190 [2:03:57<00:00,  1.83it/s]

{'train_runtime': 7437.9232, 'train_samples_per_second': 19.764, 'train_steps_per_second': 1.236, 'train_loss': 0.2774503419396667, 'epoch': 5.0}


100%|██████████| 9190/9190 [2:03:58<00:00,  1.24it/s]


{'train_runtime': 7437.9232,
 'train_samples_per_second': 19.764,
 'train_steps_per_second': 1.236,
 'total_flos': 7025499503316000.0,
 'train_loss': 0.2774503419396667,
 'epoch': 5.0}

In [4]:
test_base = load_test().collect()
test_transformed_base = transform(test_base)
deberta_prediction_base = deberta_base.predict(load_dataset(test_transformed_base["text"].to_list(), test_transformed_base["label"].to_list()))
deberta_prediction_base

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
100%|██████████| 750/750 [02:03<00:00,  6.06it/s]


PredictionOutput(predictions=array([[-0.26294878,  0.41804612],
       [ 0.83503443, -0.4602745 ],
       [ 0.83503443, -0.4602745 ],
       ...,
       [ 0.27707192, -0.04941907],
       [ 0.27707192, -0.04941907],
       [ 0.27707192, -0.04941907]], dtype=float32), label_ids=array([1, 0, 0, ..., 1, 0, 1]), metrics={'test_loss': 0.36330166459083557, 'test_precision': 0.7212765957446808, 'test_recall': 0.6529695024077047, 'test_fscore': 0.6854254422914912, 'test_accuracy': 0.8444166666666667, 'test_tp': 2034, 'test_tn': 8099, 'test_fp': 786, 'test_fn': 1081, 'test_runtime': 124.0368, 'test_samples_per_second': 96.746, 'test_steps_per_second': 6.047})

In [6]:
predictions_base = DataFrame({"id": test_base["id"], "label": deberta_prediction_base.label_ids, "deberta": np.argmax(deberta_prediction_base.predictions, axis=1)})
predictions_base.head()

id,label,deberta
i64,i64,i64
100,1,1
10005,0,0
10006,0,0
10007,0,0
10008,0,0


## Results

In [7]:
scores_deberta_base = calculate_scores(predictions_base["label"], predictions_base["deberta"])
scores_deberta_base

precision:	0.7213
recall:		0.6530
fscore:		0.6854
accuracy:	0.8444
tn: 8099	 fp: 786
fn: 1081	 tp: 2034