#In this notebook we load our RoBERTa based fine-tuned models and evaluate them on the validation and test set

# Install

In [None]:
!pip install -U -q datasets transformers torchinfo

[K     |████████████████████████████████| 441 kB 37.4 MB/s 
[K     |████████████████████████████████| 5.3 MB 63.3 MB/s 
[K     |████████████████████████████████| 163 kB 69.4 MB/s 
[K     |████████████████████████████████| 212 kB 73.8 MB/s 
[K     |████████████████████████████████| 115 kB 71.9 MB/s 
[K     |████████████████████████████████| 127 kB 69.7 MB/s 
[K     |████████████████████████████████| 7.6 MB 28.7 MB/s 
[K     |████████████████████████████████| 115 kB 59.0 MB/s 
[?25h

# Imports

In [None]:
from datasets import Dataset, DatasetDict, ClassLabel, Value, load_dataset, load_metric
from huggingface_hub import notebook_login
from torchinfo import summary
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
import pandas as pd
from sklearn.metrics import classification_report

# Data Loading

In [1]:
# login to HF
notebook_login()

NameError: ignored

In [None]:
# Load dataset from our HF dataset repo
ds = load_dataset('sara-nabhani/lfd-proj',
    'csv', 
    data_files={'train': 'train.csv', 'val': 'val.csv', 'test': 'test.csv'}
)

cl = ClassLabel(names=list(ds['train'].unique('label')))
ds = ds.cast_column('label', cl)

ds



Downloading and preparing dataset csv/sara-nabhani--lfd-proj to /root/.cache/huggingface/datasets/sara-nabhani___csv/sara-nabhani--lfd-proj-a35fbd5b9bbbc3d0/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/1.62M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/134k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/132k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

0 tables [00:00, ? tables/s]

0 tables [00:00, ? tables/s]

0 tables [00:00, ? tables/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/sara-nabhani___csv/sara-nabhani--lfd-proj-a35fbd5b9bbbc3d0/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Casting the dataset:   0%|          | 0/2 [00:00<?, ?ba/s]

Casting the dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

Casting the dataset:   0%|          | 0/1 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['tweet', 'label'],
        num_rows: 12240
    })
    val: Dataset({
        features: ['tweet', 'label'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['tweet', 'label'],
        num_rows: 860
    })
})

# roberta-base-finetuned-0 Model Evaluation

In [None]:
model_id = 'sara-nabhani/roberta-base-fintuned-0'
# Load model tokenizer and tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained(model_id)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def tokenize_function(examples):
    return tokenizer(examples['tweet'], padding='max_length', truncation=True)

tokenized_ds = ds.map(tokenize_function, batched=True)

Downloading:   0%|          | 0.00/380 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/280 [00:00<?, ?B/s]

  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
metric = load_metric("accuracy")
metric1= load_metric("f1")
# evaluation function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"acc": metric.compute(predictions=predictions, references=labels), "f1":metric1.compute(predictions=predictions, references=labels, average='macro')}
# load model
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
trainer = Trainer(
  model=model,
  train_dataset=tokenized_ds['train'],
  eval_dataset=tokenized_ds['val'],
  compute_metrics=compute_metrics,
  tokenizer=tokenizer,
  data_collator=data_collator
  )

  """Entry point for launching an IPython kernel.


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/835 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

##Evaluation

In [None]:
print("Prediction on validation data:")
print(trainer.predict(tokenized_ds['val'])[-1])

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1000
  Batch size = 8
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Prediction on validation data:


{'test_loss': 0.4589715898036957, 'test_acc': {'accuracy': 0.802}, 'test_f1': {'f1': 0.7856734616117356}, 'test_runtime': 30.0922, 'test_samples_per_second': 33.231, 'test_steps_per_second': 4.154}


In [None]:
print("Prediction on test data:")
print(trainer.predict(tokenized_ds['test'])[-1])

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


Prediction on test data:


{'test_loss': 0.3589843809604645, 'test_acc': {'accuracy': 0.8383720930232558}, 'test_f1': {'f1': 0.7940382392113012}, 'test_runtime': 24.4779, 'test_samples_per_second': 35.134, 'test_steps_per_second': 4.412}


##Saving to DF

In [None]:
val_predictions = np.argmax(trainer.predict(tokenized_ds['val'])[0], axis=-1)
test_predictions = np.argmax(trainer.predict(tokenized_ds['test'])[0], axis=-1)

The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1000
  Batch size = 8


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


In [None]:
val_data_frame=pd.DataFrame({'tweet': ds['val']['tweet'], 'true_label': ds['val']['label'], 'pred_label': val_predictions})
test_data_frame=pd.DataFrame({'tweet': ds['test']['tweet'], 'true_label': ds['test']['label'], 'pred_label': test_predictions})

In [None]:
print("Validation classification report:")
print(classification_report(val_data_frame['true_label'], val_data_frame['pred_label']))

Validation classification report:
              precision    recall  f1-score   support

           0       0.71      0.75      0.73       352
           1       0.86      0.83      0.84       648

    accuracy                           0.80      1000
   macro avg       0.78      0.79      0.79      1000
weighted avg       0.81      0.80      0.80      1000



In [None]:
print("Test classification report:")
print(classification_report(test_data_frame['true_label'], test_data_frame['pred_label']))

Test classification report:
              precision    recall  f1-score   support

           0       0.73      0.67      0.70       240
           1       0.88      0.90      0.89       620

    accuracy                           0.84       860
   macro avg       0.80      0.79      0.79       860
weighted avg       0.84      0.84      0.84       860



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
test_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/models-output/roberta-base-fintuned-0.csv', index=False)
val_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/val-models-output/roberta-base-fintuned-0.csv', index=False)

# roberta-base-finetuned-1 Model Evaluation

In [None]:
model_id = 'sara-nabhani/roberta-base-fintuned-1'
# Load model tokenizer and tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained(model_id)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def tokenize_function(examples):
    return tokenizer(examples['tweet'], padding='max_length', truncation=True)

tokenized_ds = ds.map(tokenize_function, batched=True)

print(len(tokenized_ds['train']['input_ids'][0]))
metric = load_metric("accuracy")
metric1= load_metric("f1")
# evaluation function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"acc": metric.compute(predictions=predictions, references=labels), "f1":metric1.compute(predictions=predictions, references=labels, average='macro')}
# load model
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
trainer = Trainer(
  model=model,
  train_dataset=tokenized_ds['train'],
  eval_dataset=tokenized_ds['val'],
  compute_metrics=compute_metrics,
  tokenizer=tokenizer,
  data_collator=data_collator
  )
# Evaluation
print("Prediction on validation data:")
print(trainer.predict(tokenized_ds['val'])[-1])
print("Prediction on test data:")
print(trainer.predict(tokenized_ds['test'])[-1])
# Saving to DF
val_predictions = np.argmax(trainer.predict(tokenized_ds['val'])[0], axis=-1)
test_predictions = np.argmax(trainer.predict(tokenized_ds['test'])[0], axis=-1)
val_data_frame=pd.DataFrame({'tweet': ds['val']['tweet'], 'true_label': ds['val']['label'], 'pred_label': val_predictions})
test_data_frame=pd.DataFrame({'tweet': ds['test']['tweet'], 'true_label': ds['test']['label'], 'pred_label': test_predictions})
print("Validation classification report:")
print(classification_report(val_data_frame['true_label'], val_data_frame['pred_label']))
print("Test classification report:")
print(classification_report(test_data_frame['true_label'], test_data_frame['pred_label']))
test_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/models-output/roberta-base-fintuned-1.csv', index=False)
val_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/val-models-output/roberta-base-fintuned-1.csv', index=False)

Downloading:   0%|          | 0.00/380 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/280 [00:00<?, ?B/s]

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/merges.txt
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/tokenizer_config.json


  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

512


Downloading:   0%|          | 0.00/835 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/config.json
Model config RobertaConfig {
  "_name_or_path": "sara-nabhani/roberta-base-fintuned-1",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "OFF",
    "1": "NOT"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NOT": 1,
    "OFF": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4

Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-1/snapshots/b114e5622dc427a4716db260f316dc17a8425aff/pytorch_model.bin
All model checkpoint weights were used when initializing RobertaForSequenceClassification.

All the weights of RobertaForSequenceClassification were initialized from the model checkpoint at sara-nabhani/roberta-base-fintuned-1.
If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaForSequenceClassification for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don

Prediction on validation data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


{'test_loss': 0.44632744789123535, 'test_acc': {'accuracy': 0.813}, 'test_f1': {'f1': 0.7972150138967358}, 'test_runtime': 30.4217, 'test_samples_per_second': 32.871, 'test_steps_per_second': 4.109}
Prediction on test data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1000
  Batch size = 8


{'test_loss': 0.3676394522190094, 'test_acc': {'accuracy': 0.8383720930232558}, 'test_f1': {'f1': 0.7928935135856241}, 'test_runtime': 26.3864, 'test_samples_per_second': 32.593, 'test_steps_per_second': 4.093}


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


Validation classification report:
              precision    recall  f1-score   support

           0       0.72      0.76      0.74       352
           1       0.87      0.84      0.85       648

    accuracy                           0.81      1000
   macro avg       0.79      0.80      0.80      1000
weighted avg       0.82      0.81      0.81      1000

Test classification report:
              precision    recall  f1-score   support

           0       0.73      0.66      0.70       240
           1       0.87      0.91      0.89       620

    accuracy                           0.84       860
   macro avg       0.80      0.78      0.79       860
weighted avg       0.83      0.84      0.84       860



# roberta-base-finetuned-2 Model Evaluation

In [None]:
model_id = 'sara-nabhani/roberta-base-fintuned-2'
# Load model tokenizer and tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained(model_id)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def tokenize_function(examples):
    return tokenizer(examples['tweet'], padding='max_length', truncation=True, max_length=100)

tokenized_ds = ds.map(tokenize_function, batched=True)

print(len(tokenized_ds['train']['input_ids'][0]))
metric = load_metric("accuracy")
metric1= load_metric("f1")
# evaluation function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"acc": metric.compute(predictions=predictions, references=labels), "f1":metric1.compute(predictions=predictions, references=labels, average='macro')}
# load model
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
trainer = Trainer(
  model=model,
  train_dataset=tokenized_ds['train'],
  eval_dataset=tokenized_ds['val'],
  compute_metrics=compute_metrics,
  tokenizer=tokenizer,
  data_collator=data_collator
  )
# evaluation
print("Prediction on validation data:")
print(trainer.predict(tokenized_ds['val'])[-1])
print("Prediction on test data:")
print(trainer.predict(tokenized_ds['test'])[-1])
# saving to DF
val_predictions = np.argmax(trainer.predict(tokenized_ds['val'])[0], axis=-1)
test_predictions = np.argmax(trainer.predict(tokenized_ds['test'])[0], axis=-1)
val_data_frame=pd.DataFrame({'tweet': ds['val']['tweet'], 'true_label': ds['val']['label'], 'pred_label': val_predictions})
test_data_frame=pd.DataFrame({'tweet': ds['test']['tweet'], 'true_label': ds['test']['label'], 'pred_label': test_predictions})
print("Validation classification report:")
print(classification_report(val_data_frame['true_label'], val_data_frame['pred_label']))
print("Test classification report:")
print(classification_report(test_data_frame['true_label'], test_data_frame['pred_label']))
test_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/models-output/roberta-base-fintuned-2.csv', index=False)
val_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/val-models-output/roberta-base-fintuned-2.csv', index=False)

Downloading:   0%|          | 0.00/380 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/280 [00:00<?, ?B/s]

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/merges.txt
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/tokenizer_config.json


  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

100


Downloading:   0%|          | 0.00/835 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/config.json
Model config RobertaConfig {
  "_name_or_path": "sara-nabhani/roberta-base-fintuned-2",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "OFF",
    "1": "NOT"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NOT": 1,
    "OFF": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4

Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-2/snapshots/062bf711b8c31a78a59efea9df44921c179f5def/pytorch_model.bin
All model checkpoint weights were used when initializing RobertaForSequenceClassification.

All the weights of RobertaForSequenceClassification were initialized from the model checkpoint at sara-nabhani/roberta-base-fintuned-2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaForSequenceClassification for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don

Prediction on validation data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


{'test_loss': 0.4539887011051178, 'test_acc': {'accuracy': 0.808}, 'test_f1': {'f1': 0.7919186401883136}, 'test_runtime': 5.7659, 'test_samples_per_second': 173.434, 'test_steps_per_second': 21.679}
Prediction on test data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1000
  Batch size = 8


{'test_loss': 0.3565470278263092, 'test_acc': {'accuracy': 0.8511627906976744}, 'test_f1': {'f1': 0.8105974576562811}, 'test_runtime': 4.9766, 'test_samples_per_second': 172.81, 'test_steps_per_second': 21.702}


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


Validation classification report:
              precision    recall  f1-score   support

           0       0.72      0.75      0.73       352
           1       0.86      0.84      0.85       648

    accuracy                           0.81      1000
   macro avg       0.79      0.80      0.79      1000
weighted avg       0.81      0.81      0.81      1000

Test classification report:
              precision    recall  f1-score   support

           0       0.75      0.70      0.72       240
           1       0.89      0.91      0.90       620

    accuracy                           0.85       860
   macro avg       0.82      0.80      0.81       860
weighted avg       0.85      0.85      0.85       860



# roberta-base-finetuned-3 Model Evaluation

In [None]:
model_id = 'sara-nabhani/roberta-base-fintuned-3'
# Load model tokenizer and tokenize the dataset
tokenizer = AutoTokenizer.from_pretrained(model_id)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def tokenize_function(examples):
    return tokenizer(examples['tweet'], padding='max_length', truncation=True, max_length=200)

tokenized_ds = ds.map(tokenize_function, batched=True)

print(len(tokenized_ds['train']['input_ids'][0]))
metric = load_metric("accuracy")
metric1= load_metric("f1")
# evaluation function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"acc": metric.compute(predictions=predictions, references=labels), "f1":metric1.compute(predictions=predictions, references=labels, average='macro')}
#load model
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)
trainer = Trainer(
  model=model,
  train_dataset=tokenized_ds['train'],
  eval_dataset=tokenized_ds['val'],
  compute_metrics=compute_metrics,
  tokenizer=tokenizer,
  data_collator=data_collator
  )
#evaluation
print("Prediction on validation data:")
print(trainer.predict(tokenized_ds['val'])[-1])
print("Prediction on test data:")
print(trainer.predict(tokenized_ds['test'])[-1])
#saving to DF
val_predictions = np.argmax(trainer.predict(tokenized_ds['val'])[0], axis=-1)
test_predictions = np.argmax(trainer.predict(tokenized_ds['test'])[0], axis=-1)
val_data_frame=pd.DataFrame({'tweet': ds['val']['tweet'], 'true_label': ds['val']['label'], 'pred_label': val_predictions})
test_data_frame=pd.DataFrame({'tweet': ds['test']['tweet'], 'true_label': ds['test']['label'], 'pred_label': test_predictions})
print("Validation classification report:")
print(classification_report(val_data_frame['true_label'], val_data_frame['pred_label']))
print("Test classification report:")
print(classification_report(test_data_frame['true_label'], test_data_frame['pred_label']))
test_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/models-output/roberta-base-fintuned-3.csv', index=False)
val_data_frame.to_csv('/content/gdrive/MyDrive/lfd-proj/val-models-output/roberta-base-fintuned-3.csv', index=False)

Downloading:   0%|          | 0.00/380 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/280 [00:00<?, ?B/s]

loading file vocab.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/merges.txt
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/tokenizer_config.json


  0%|          | 0/13 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

200


Downloading:   0%|          | 0.00/835 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/config.json
Model config RobertaConfig {
  "_name_or_path": "sara-nabhani/roberta-base-fintuned-3",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "OFF",
    "1": "NOT"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "NOT": 1,
    "OFF": 0
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_version": "4

Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--sara-nabhani--roberta-base-fintuned-3/snapshots/5557c84244f32af096da6ac8c333362250b6c1fb/pytorch_model.bin
All model checkpoint weights were used when initializing RobertaForSequenceClassification.

All the weights of RobertaForSequenceClassification were initialized from the model checkpoint at sara-nabhani/roberta-base-fintuned-3.
If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaForSequenceClassification for predictions without further training.
No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don

Prediction on validation data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


{'test_loss': 0.4532244801521301, 'test_acc': {'accuracy': 0.803}, 'test_f1': {'f1': 0.7873853688755925}, 'test_runtime': 11.2794, 'test_samples_per_second': 88.657, 'test_steps_per_second': 11.082}
Prediction on test data:


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1000
  Batch size = 8


{'test_loss': 0.3730872571468353, 'test_acc': {'accuracy': 0.8406976744186047}, 'test_f1': {'f1': 0.7958734630304354}, 'test_runtime': 9.9711, 'test_samples_per_second': 86.249, 'test_steps_per_second': 10.831}


The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: tweet. If tweet are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 860
  Batch size = 8


Validation classification report:
              precision    recall  f1-score   support

           0       0.71      0.76      0.73       352
           1       0.86      0.83      0.85       648

    accuracy                           0.80      1000
   macro avg       0.78      0.79      0.79      1000
weighted avg       0.81      0.80      0.80      1000

Test classification report:
              precision    recall  f1-score   support

           0       0.74      0.67      0.70       240
           1       0.88      0.91      0.89       620

    accuracy                           0.84       860
   macro avg       0.81      0.79      0.80       860
weighted avg       0.84      0.84      0.84       860

