In [1]:
import datasets
from collections import defaultdict, Counter
from datasets import load_dataset
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
import torch

In [2]:
def print_label_dist(dataset, labelname='gold_label', splitnames=('train', 'validation')):
    for splitname in splitnames:
        print(splitname)
        dist = sorted(Counter(dataset[splitname][labelname]).items())
        for k, v in dist:
            print(f"\t{k:>14s}: {v}")

dynasent_r1 = load_dataset("dynabench/dynasent", 'dynabench.dynasent.r1.all')
dynasent_r2 = load_dataset("dynabench/dynasent", 'dynabench.dynasent.r2.all')
sst = load_dataset("SetFit/sst5")

def convert_sst_label(s):
    return s.split(" ")[-1]

for splitname in ('train', 'validation', 'test'):
    dist = [convert_sst_label(s) for s in sst[splitname]['label_text']]
    sst[splitname] = sst[splitname].add_column('gold_label', dist)
    sst[splitname] = sst[splitname].add_column('sentence', sst[splitname]['text'])


print_label_dist(sst)
print_label_dist(dynasent_r1)
print_label_dist(dynasent_r2)





train
	      negative: 3310
	       neutral: 1624
	      positive: 3610
validation
	      negative: 428
	       neutral: 229
	      positive: 444
train
	      negative: 14021
	       neutral: 45076
	      positive: 21391
validation
	      negative: 1200
	       neutral: 1200
	      positive: 1200
train
	      negative: 4579
	       neutral: 2448
	      positive: 6038
validation
	      negative: 240
	       neutral: 240
	      positive: 240


  return cls._concat_blocks(pa_tables_to_concat_vertically, axis=0)


In [3]:
from torch_shallow_neural_classifier import TorchShallowNeuralClassifier
import transformers
from transformers import AutoModel, AutoTokenizer
import torch.nn as nn
   


class BertClassifierModule(nn.Module):
    def __init__(self, 
            n_classes, 
            hidden_activation, 
            weights_name="prajjwal1/bert-mini",
            max_model_length=512):
        """This module loads a Transformer based on  `weights_name`,
        puts it in train mode, add a dense layer with activation
        function give by `hidden_activation`, and puts a classifier
        layer on top of that as the final output. The output of
        the dense layer should have the same dimensionality as the
        model input.

        Parameters
        ----------
        n_classes : int
            Number of classes for the output layer
        hidden_activation : torch activation function
            e.g., nn.Tanh()
        weights_name : str
            Name of pretrained model to load from Hugging Face

        """
        super().__init__()
        self.n_classes = n_classes
        self.weights_name = weights_name
        self.bert = AutoModel.from_pretrained(self.weights_name)
        self.bert.train()
        self.max_model_length = max_model_length
        # for name, param in self.bert.named_parameters():
        #     param.requires_grad = False

        self.hidden_activation = hidden_activation
        self.hidden_dim = self.bert.embeddings.word_embeddings.embedding_dim
        
        
        self.classifier_layer = nn.Sequential(
            nn.Linear(self.hidden_dim*self.max_model_length, self.hidden_dim),
            self.hidden_activation,
            nn.Linear(self.hidden_dim, self.hidden_dim),
            self.hidden_activation,
            nn.Linear(self.hidden_dim, self.n_classes)
        )


    def forward(self, indices, mask):
        """Process `indices` with `mask` by feeding these arguments
        to `self.bert` and then feeding the initial hidden state
        in `last_hidden_state` to `self.classifier_layer`

        Parameters
        ----------
        indices : tensor.LongTensor of shape (n_batch, k)
            Indices into the `self.bert` embedding layer. `n_batch` is
            the number of examples and `k` is the sequence length for
            this batch
        mask : tensor.LongTensor of shape (n_batch, d)
            Binary vector indicating which values should be masked.
            `n_batch` is the number of examples and `k` is the
            sequence length for this batch

        Returns
        -------
        tensor.FloatTensor
            Predicted values, shape `(n_batch, self.n_classes)`

        """

        # rep = self.bert(input_ids=indices, attention_mask=mask)
        # return self.classifier_layer(rep.last_hidden_state[:,0,:])

        rep = self.bert(input_ids=indices, attention_mask=mask)
        return self.classifier_layer(torch.flatten(rep.last_hidden_state, start_dim=1))


class BertClassifier(TorchShallowNeuralClassifier):
    def __init__(self, weights_name, *args, **kwargs):
        self.weights_name = weights_name
        self.tokenizer = AutoTokenizer.from_pretrained(self.weights_name)
        super().__init__(*args, **kwargs)
        self.params += ['weights_name']

    def build_graph(self):
        return BertClassifierModule(
            self.n_classes_, self.hidden_activation, self.weights_name, max_model_length=self.tokenizer.model_max_length)

    def build_dataset(self, X, y=None):
        
        data = self.tokenizer(X, max_length=512, 
                    truncation=True, 
                    padding='max_length', 
                    add_special_tokens=True, 
                    return_attention_mask = True,
                    return_tensors="pt")
        if y is None:
            dataset = torch.utils.data.TensorDataset(
                data['input_ids'], data['attention_mask'])
        else:
            self.classes_ = sorted(set(y))
            self.n_classes_ = len(self.classes_)
            class2index = dict(zip(self.classes_, range(self.n_classes_)))
            y = [class2index[label] for label in y]
            y = torch.tensor(y)
            dataset = torch.utils.data.TensorDataset(
                data['input_ids'], data['attention_mask'], y)
        return dataset

bert_finetune = BertClassifier(
    weights_name="FacebookAI/roberta-base",
    max_iter=25,
    hidden_activation=nn.ReLU(),
    eta=0.00005,          # Low learning rate for effective fine-tuning.
    batch_size=64,         # Small batches to avoid memory overload.
    gradient_accumulation_steps=1,  # Increase the effective batch size to 32.
    early_stopping=True,  # Early-stopping
    n_iter_no_change=5)   # params.

In [4]:
%%time
# FINAL MODEL
# roberta base cased with flatten tensor instead of first token embedding + one more linear layer epochs to 25
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'] + dynasent_r1['train']['sentence'] + dynasent_r2['train']['sentence'],
    sst['train']['gold_label'] + dynasent_r1['train']['gold_label'] + dynasent_r2['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'] + dynasent_r1['validation']['sentence'] + dynasent_r2['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'] + dynasent_r1['validation']['gold_label'] + dynasent_r2['validation']['gold_label'], preds, digits=3))

Some weights of RobertaModel were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Finished epoch 1 of 25; error is 864.6148504018784Bad pipe message: %s [b'\x8f\xe8a>\xe5\x88%m\x92sCFh\x8b\xd8\xdd,\xc7 T0-L\xef/\xb9P\x9c\x86,\xd9<vjy\xc9\x82\xe3\xeb~\x1f\x7f\x13\x08\xe1\x85\xfd\xad\x82\x91f\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127', b'.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00']
Bad pipe message: %s [b'\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01']
Bad pipe message: %s [b"\x11\x02\x01\xdb\xdb\xac\x0c*:\x08\x9d\x02\xbfol\x91,'\x00\x00|\xc0,\xc00\

              precision    recall  f1-score   support

    negative      0.797     0.746     0.771      1868
     neutral      0.698     0.767     0.731      1669
    positive      0.793     0.773     0.783      1884

    accuracy                          0.762      5421
   macro avg      0.763     0.762     0.762      5421
weighted avg      0.765     0.762     0.763      5421

CPU times: user 5h 4min 49s, sys: 44.4 s, total: 5h 5min 33s
Wall time: 5h 5min 3s


In [5]:
import os

bakeoff_df = pd.read_csv(
    os.path.join("data", "sentiment", "cs224u-sentiment-test-unlabeled.csv"))
bakeoff_df['prediction'] = bert_finetune.predict(bakeoff_df["sentence"].to_list())
bakeoff_df.to_csv("data/sentiment/cs224u-sentiment-bakeoff-entry.csv")

In [4]:
%%time
# distill bert base cased with flatten tensor instead of first token embedding + one more linear layer
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

Finished epoch 10 of 10; error is 2.388294771371875

              precision    recall  f1-score   support

    negative      0.776     0.743     0.759       428
     neutral      0.384     0.310     0.343       229
    positive      0.743     0.847     0.792       444

    accuracy                          0.695      1101
   macro avg      0.634     0.633     0.631      1101
weighted avg      0.681     0.695     0.686      1101

CPU times: user 14min 42s, sys: 2.91 s, total: 14min 45s
Wall time: 14min 41s


In [4]:
%%time
# roberta base with flatten tensor instead of first token embedding + one more linear layer
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at FacebookAI/roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Finished epoch 10 of 10; error is 9.714834146201613

              precision    recall  f1-score   support

    negative      0.782     0.811     0.796       428
     neutral      0.405     0.328     0.362       229
    positive      0.807     0.858     0.832       444

    accuracy                          0.729      1101
   macro avg      0.665     0.665     0.663      1101
weighted avg      0.714     0.729     0.720      1101

CPU times: user 28min 28s, sys: 6.14 s, total: 28min 34s
Wall time: 28min 30s


In [7]:
%%time
# original bert mini with flatten tensor instead of first token embedding + one more linear layer
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

Stopping after epoch 10. Validation score did not improve by tol=1e-05 for more than 5 epochs. Final error is 5.969286805018783

              precision    recall  f1-score   support

    negative      0.693     0.706     0.699       428
     neutral      0.335     0.384     0.358       229
    positive      0.749     0.678     0.712       444

    accuracy                          0.628      1101
   macro avg      0.592     0.589     0.589      1101
weighted avg      0.641     0.628     0.633      1101

CPU times: user 1min 56s, sys: 166 ms, total: 1min 56s
Wall time: 1min 54s


In [4]:
%%time
# original bert mini with flatten tensor instead of first token embedding
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

  return self.fget.__get__(instance, owner)()
Finished epoch 10 of 10; error is 8.079548183828592

              precision    recall  f1-score   support

    negative      0.641     0.752     0.692       428
     neutral      0.319     0.227     0.265       229
    positive      0.722     0.709     0.716       444

    accuracy                          0.626      1101
   macro avg      0.561     0.563     0.558      1101
weighted avg      0.607     0.626     0.613      1101

CPU times: user 1min 58s, sys: 1.44 s, total: 1min 59s
Wall time: 1min 57s


In [10]:
%%time
# original bert mini with frozen backbone, lr*10
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

Finished epoch 10 of 10; error is 106.32385301589966

              precision    recall  f1-score   support

    negative      0.551     0.752     0.636       428
     neutral      0.500     0.083     0.142       229
    positive      0.649     0.700     0.674       444

    accuracy                          0.592      1101
   macro avg      0.567     0.512     0.484      1101
weighted avg      0.580     0.592     0.549      1101

CPU times: user 45.3 s, sys: 208 ms, total: 45.6 s
Wall time: 43.9 s


In [4]:
%%time
# original bert mini with frozen backbone
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

  return self.fget.__get__(instance, owner)()
Finished epoch 10 of 10; error is 111.5887838602066

              precision    recall  f1-score   support

    negative      0.549     0.689     0.611       428
     neutral      0.667     0.009     0.017       229
    positive      0.597     0.755     0.667       444

    accuracy                          0.574      1101
   macro avg      0.604     0.484     0.432      1101
weighted avg      0.593     0.574     0.510      1101

CPU times: user 47.4 s, sys: 1.22 s, total: 48.6 s
Wall time: 46.1 s


In [4]:
%%time
# bert mini with 1 more linear layer
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

  return self.fget.__get__(instance, owner)()
Finished epoch 10 of 10; error is 28.556204199790955

              precision    recall  f1-score   support

    negative      0.717     0.682     0.699       428
     neutral      0.313     0.245     0.275       229
    positive      0.703     0.815     0.755       444

    accuracy                          0.645      1101
   macro avg      0.578     0.581     0.576      1101
weighted avg      0.627     0.645     0.633      1101

CPU times: user 1min 55s, sys: 1.33 s, total: 1min 56s
Wall time: 1min 54s


In [4]:
%%time
# distill bert base uncased
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

Finished epoch 10 of 10; error is 4.913262668880634

              precision    recall  f1-score   support

    negative      0.796     0.675     0.731       428
     neutral      0.380     0.380     0.380       229
    positive      0.750     0.860     0.802       444

    accuracy                          0.688      1101
   macro avg      0.642     0.639     0.637      1101
weighted avg      0.691     0.688     0.686      1101

CPU times: user 13min 56s, sys: 3.51 s, total: 13min 59s
Wall time: 13min 55s


In [4]:
%%time
#bert mini 
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    sst['train']['sentence'],
    sst['train']['gold_label'])

preds = bert_finetune.predict(sst['validation']['sentence'])
print(classification_report(sst['validation']['gold_label'], preds, digits=3))

  return self.fget.__get__(instance, owner)()
Finished epoch 10 of 10; error is 27.84134368598461

              precision    recall  f1-score   support

    negative      0.697     0.710     0.704       428
     neutral      0.352     0.297     0.322       229
    positive      0.710     0.755     0.731       444

    accuracy                          0.642      1101
   macro avg      0.586     0.587     0.586      1101
weighted avg      0.631     0.642     0.636      1101

CPU times: user 1min 55s, sys: 1.38 s, total: 1min 56s
Wall time: 1min 54s


In [12]:
%%time
# bert mini
from sklearn.metrics import classification_report

_ = bert_finetune.fit(
    dynasent_r1['train']['sentence'],
    dynasent_r1['train']['gold_label'])

preds = bert_finetune.predict(dynasent_r1['validation']['sentence'])
print(classification_report(dynasent_r1['validation']['gold_label'], preds, digits=3))

Finished epoch 10 of 10; error is 64.46418231725693

              precision    recall  f1-score   support

    negative      0.787     0.578     0.666      1200
     neutral      0.637     0.862     0.733      1200
    positive      0.743     0.679     0.710      1200

    accuracy                          0.706      3600
   macro avg      0.722     0.706     0.703      3600
weighted avg      0.722     0.706     0.703      3600

CPU times: user 24min 41s, sys: 1.79 s, total: 24min 43s
Wall time: 16min 2s


In [8]:
import os

bakeoff_df = pd.read_csv(
    os.path.join("data", "sentiment", "cs224u-sentiment-test-unlabeled.csv"))
bakeoff_df['prediction'] = bert_finetune.predict(bakeoff_df["sentence"].to_list())
bakeoff_df.to_csv("data/sentiment/cs224u-sentiment-bakeoff-entry.csv")

In [13]:
bakeoff_df.head()

Unnamed: 0,example_id,sentence
0,0,This year we were at a restaurant that clearly...
1,1,A long way.
2,2,A friend and I went on a Thursday evening aro...
3,3,You'll love to say I used to be married to tha...
4,4,I feel like any place I move will be a downgra...


In [14]:
bakeoff_df['prediction'] = bert_finetune.predict(bakeoff_df["sentence"].to_list())

In [16]:
bakeoff_df.to_csv("data/sentiment/cs224u-sentiment-bakeoff-entry.csv")