In [10]:
# import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
# import torch
# from torch.utils.data import Dataset, DataLoader
from datasets import load_dataset

model_path = 'microsoft/deberta-v3-small'
max_length = 32
train_path = "./group_train.csv"
val_path = "./group_test.csv"

tokenizer = AutoTokenizer.from_pretrained(model_path)


In [11]:
# ds = load_dataset('csv', data_files="./30k.csv", split = 'train')
# ds = ds.train_test_split(test_size=0.3)

In [12]:

train = load_dataset('csv', data_files=train_path, split = 'train')
val = load_dataset('csv', data_files=val_path, split = 'train')
val = val.train_test_split(test_size=6314)['test']

In [13]:

def preprocess(ds):
  text = f"{ds['argument']}.\n{ds['topic']}"
  tokenized_text = tokenizer(text , max_length=max_length , padding='max_length', truncation=True)
  tokenized_text['label'] = ds['WA'],ds['stance_WA']

  return tokenized_text

# train = ds['train']
train = train.map(preprocess)
# val = ds['test']
val = val.map(preprocess)

Map:   0%|          | 0/6314 [00:00<?, ? examples/s]

In [14]:
import evaluate
import numpy as np

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):

   predictions, labels = eval_pred
   predictions = sigmoid(predictions)
   predictions = (predictions > 0.5).astype(float).reshape(-1)
   return clf_metrics.compute(predictions=predictions, references=labels.astype(float).reshape(-1))

In [16]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_path)

model.config

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-small and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-small",
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 6,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.40.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

[codecarbon INFO @ 11:19:25] Energy consumed for RAM : 0.001850 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 11:19:25] Energy consumed for all CPUs : 0.001542 kWh. Total CPU Power : 5.0 W
[codecarbon INFO @ 11:19:25] 0.003392 kWh of electricity used since the beginning.
[codecarbon INFO @ 11:19:55] Energy consumed for RAM : 0.001900 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 11:19:55] Energy consumed for all CPUs : 0.001584 kWh. Total CPU Power : 5.0 W
[codecarbon INFO @ 11:19:55] 0.003484 kWh of electricity used since the beginning.
[codecarbon INFO @ 11:20:25] Energy consumed for RAM : 0.001950 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 11:20:25] Energy consumed for all CPUs : 0.001625 kWh. Total CPU Power : 5.0 W
[codecarbon INFO @ 11:20:25] 0.003575 kWh of electricity used since the beginning.
[codecarbon INFO @ 11:20:55] Energy consumed for RAM : 0.002000 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 11:20:55] Energy consumed for all CPUs : 0.001667 kWh. Total CPU Power : 5.0 W
[codecarbon

In [None]:
training_args = TrainingArguments(
    output_dir="./output",
    learning_rate=3e-5,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

trainer = Trainer(

   model=model,
   args=training_args,
   train_dataset=train,
   eval_dataset=val,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
)

trainer.train()