In [None]:
!pip install -q -U -r scripts/requirements.txt

In [2]:
from scripts.model_factory import initialize_model
from datasets import load_dataset
from scripts.configurations import config
from scripts.finetuner import FineTuner
from scripts.evaluation import evaluate
from scripts.utility import error_analysis, set_all_seeds

In [3]:
# setting seeds for reproducibility
set_all_seeds()

In [None]:
# Load your datasets
dataset_dict = load_dataset('csv', data_files={
    'general_train': 'datasets/general_train.csv',
    'general_val': 'datasets/general_val.csv',
    'general_test': 'datasets/general_test.csv',
    'ssc_train': 'datasets/ssc_train.csv',
    'ssc_val': 'datasets/ssc_val.csv',
    'ssc_test': 'datasets/ssc_test.csv',
    'all_train': 'datasets/all_train.csv',
    'all_val': 'datasets/all_val.csv',
})

general_train = dataset_dict['general_train']
general_val = dataset_dict['general_val']
general_test = dataset_dict['general_test']
ssc_train = dataset_dict['ssc_train']
ssc_val = dataset_dict['ssc_val']
ssc_test = dataset_dict['ssc_test']
all_train = dataset_dict['all_train']
all_val = dataset_dict['all_val']

# fine-tuning BERT model on general-purpose trianing data

In [5]:
model, tokenizer = initialize_model("BERT")
BERT_finetuner = FineTuner(model, tokenizer, config["BERT"]["training_args"])
BERT_finetuner.train(general_train, general_val)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/4608 [00:00<?, ? examples/s]

Map:   0%|          | 0/1152 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4205,0.38984,0.831597,0.839934,0.801575,0.882149
2,0.0056,0.617491,0.827257,0.826201,0.832746,0.819757
3,0.0103,0.615826,0.844618,0.847918,0.831667,0.864818
4,0.0098,0.819854,0.846354,0.848847,0.8367,0.861352
5,0.0002,0.939247,0.855903,0.858603,0.844221,0.873484
6,0.0002,1.021968,0.850694,0.850174,0.854641,0.845754
7,0.0001,1.153487,0.859375,0.864322,0.836305,0.894281
8,0.0001,1.17353,0.859375,0.864548,0.835218,0.896014
9,0.0001,1.254664,0.855903,0.858844,0.843072,0.875217
10,0.0,1.244323,0.862847,0.865417,0.850921,0.880416


### evaluation of general-purpose BERT on general-purpose test set

In [6]:
classification_report = evaluate(model, tokenizer, general_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.97      0.86      0.91      3070
     Class 1       0.55      0.88      0.68       609

    accuracy                           0.86      3679
   macro avg       0.76      0.87      0.79      3679
weighted avg       0.90      0.86      0.87      3679



### evaluation of general-purpose BERT on social science test set

In [7]:
classification_report = evaluate(model, tokenizer, ssc_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.77      0.70      0.73       123
     Class 1       0.72      0.79      0.75       123

    accuracy                           0.74       246
   macro avg       0.75      0.74      0.74       246
weighted avg       0.75      0.74      0.74       246



# fine-tuning BERT model on social science trianing data

In [5]:
model, tokenizer = initialize_model("BERT")

BERT_finetuner = FineTuner(model, tokenizer,
                           config["BERT"]["training_args"],
                           model_output_dir = "ssc_bert")

BERT_finetuner.train(ssc_train, ssc_val)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/702 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6748,0.464164,0.809091,0.783505,0.904762,0.690909
2,0.048,0.404778,0.881818,0.894309,0.808824,1.0
3,0.0107,0.625043,0.854545,0.868852,0.791045,0.963636
4,0.0017,0.752012,0.854545,0.862069,0.819672,0.909091
5,0.0008,0.798817,0.863636,0.876033,0.80303,0.963636
6,0.0005,0.78125,0.890909,0.9,0.830769,0.981818
7,0.0004,0.844746,0.872727,0.881356,0.825397,0.945455
8,0.0005,0.850117,0.881818,0.892562,0.818182,0.981818
9,0.0004,0.846193,0.881818,0.890756,0.828125,0.963636
10,0.0003,0.85947,0.881818,0.890756,0.828125,0.963636


### evaluation of social science BERT on general-purpose test set

In [6]:
classification_report = evaluate(model, tokenizer, general_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.87      0.94      0.91      3070
     Class 1       0.51      0.31      0.39       609

    accuracy                           0.84      3679
   macro avg       0.69      0.63      0.65      3679
weighted avg       0.81      0.84      0.82      3679



### evaluation of social science BERT on social science test set

In [7]:
classification_report = evaluate(model, tokenizer, ssc_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.94      0.83      0.88       123
     Class 1       0.85      0.94      0.89       123

    accuracy                           0.89       246
   macro avg       0.89      0.89      0.89       246
weighted avg       0.89      0.89      0.89       246



# fine-tuning BERT model on merged trianing data




In [5]:
model, tokenizer = initialize_model("BERT")
BERT_finetuner = FineTuner(model, tokenizer, config["BERT"]["training_args"])
BERT_finetuner.train(all_train, all_val)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/5310 [00:00<?, ? examples/s]

Map:   0%|          | 0/1262 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3362,0.391392,0.823296,0.83639,0.779754,0.901899
2,0.0877,0.485505,0.842314,0.847976,0.819793,0.878165
3,0.0019,0.635188,0.8542,0.8528,0.86246,0.843354
4,0.0037,0.895546,0.850238,0.851532,0.845554,0.857595
5,0.0002,1.051674,0.846276,0.851455,0.824926,0.879747
6,0.0002,1.156889,0.846276,0.853695,0.815562,0.89557
7,0.0001,1.236676,0.849445,0.855842,0.822157,0.892405
8,0.0002,1.248466,0.848653,0.856714,0.814551,0.903481
9,0.0,1.274053,0.85103,0.85736,0.823615,0.893987
10,0.0,1.292519,0.848653,0.853864,0.826667,0.882911


### evaluation of general-purpose SciBERT on general-purpose test set

In [6]:
classification_report = evaluate(model, tokenizer, general_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.98      0.83      0.90      3070
     Class 1       0.51      0.90      0.65       609

    accuracy                           0.84      3679
   macro avg       0.74      0.86      0.77      3679
weighted avg       0.90      0.84      0.85      3679



### evaluation of general-purpose SciBERT on social science test set

In [7]:
classification_report = evaluate(model, tokenizer, ssc_test)

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.88      0.80      0.84       123
     Class 1       0.81      0.89      0.85       123

    accuracy                           0.85       246
   macro avg       0.85      0.85      0.85       246
weighted avg       0.85      0.85      0.85       246

