In [1]:
!pip install datasets
!pip install accelerate -U
!pip install transformers



In [2]:
import pandas as pd
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from datasets import Dataset
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: %s" % device)

Device: cuda


## TASK 1


In [4]:
### Arguments
arguments_training_url = (
    "https://zenodo.org/records/8248658/files/arguments-training.tsv?download=1"
)
arguments_validation_url = (
    "https://zenodo.org/records/8248658/files/arguments-validation.tsv?download=1"
)
arguments_test_url = (
    "https://zenodo.org/records/8248658/files/arguments-test.tsv?download=1"
)

### Human values
labels_training_url = (
    "https://zenodo.org/records/8248658/files/labels-training.tsv?download=1"
)
labels_validation_url = (
    "https://zenodo.org/records/8248658/files/labels-validation.tsv?download=1"
)
labels_test_url = "https://zenodo.org/records/8248658/files/labels-test.tsv?download=1"

In [5]:
### Creating Training dataframe
arguments_tr_df = pd.read_csv(arguments_training_url, sep="\t")
labels_tr_df = pd.read_csv(labels_training_url, sep="\t")

### Merging arguments and labels
train_df_nm = pd.merge(arguments_tr_df, labels_tr_df, on="Argument ID")

### Creating Validation dataframe
arguments_va_df = pd.read_csv(arguments_validation_url, sep="\t")
labels_va_df = pd.read_csv(labels_validation_url, sep="\t")

### Merging arguments and labels
validation_df_nm = pd.merge(arguments_va_df, labels_va_df, on="Argument ID")

### Creating Test dataframe
arguments_te_df = pd.read_csv(arguments_test_url, sep="\t")
labels_te_df = pd.read_csv(labels_test_url, sep="\t")

### Merging arguments and labels
test_df_nm = pd.merge(arguments_te_df, labels_te_df, on="Argument ID")

### Notation
### nm=not merged with logical OR

In [6]:
train_df_nm.describe()

Unnamed: 0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
count,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0
mean,0.1832,0.258669,0.0458,0.031893,0.280363,0.11311,0.115891,0.070833,0.370851,0.320415,0.105322,0.218246,0.038383,0.073243,0.246987,0.149453,0.385871,0.079177,0.123123,0.195439
std,0.386867,0.437944,0.209071,0.175732,0.449218,0.316756,0.320124,0.256569,0.483077,0.466679,0.306996,0.413094,0.192137,0.260559,0.431299,0.356567,0.486845,0.270039,0.328608,0.396575
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
train_df_nm.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,...,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,A01006,We should end the use of economic sanctions,against,sometimes economic sanctions are the only thin...,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,A01007,We should abolish capital punishment,against,capital punishment is sometimes the only optio...,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,A01008,We should ban factory farming,against,factory farming allows for the production of c...,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0


In [8]:
### Considering category ranges (0,3),(3,7),(7,13),(13,19)
### adding +4, considering the first 4 columns which are not categories
column_ranges = [(4, 7), (7, 11), (11, 17), (17, 23)]
level_3_cat = [
    "Openness_to_change",
    "Self_enhancement",
    "Conversation",
    "Self_transcendence",
]
columns_to_keep = ["Argument ID", "Conclusion", "Stance", "Premise"]

### Creating final dataframes
train_df = pd.DataFrame()
validation_df = pd.DataFrame()
test_df = pd.DataFrame()

### Applying OR to the selected columns using .any(axis=1)
for (start, end), cat in zip(column_ranges, level_3_cat):
    train_df[cat] = train_df_nm.iloc[:, start:end].any(axis=1)
    validation_df[cat] = validation_df_nm.iloc[:, start:end].any(axis=1)
    test_df[cat] = test_df_nm.iloc[:, start:end].any(axis=1)

### Reading the columns to keep
train_df = pd.concat([train_df_nm[columns_to_keep], train_df], axis=1)
validation_df = pd.concat([validation_df_nm[columns_to_keep], validation_df], axis=1)
test_df = pd.concat([test_df_nm[columns_to_keep], test_df], axis=1)

In [9]:
train_df.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
0,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,False,False,True,False
1,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,False,False,True,False
2,A01006,We should end the use of economic sanctions,against,sometimes economic sanctions are the only thin...,False,True,True,False
3,A01007,We should abolish capital punishment,against,capital punishment is sometimes the only optio...,False,False,True,True
4,A01008,We should ban factory farming,against,factory farming allows for the production of c...,False,False,True,True


In [10]:
test_df.describe()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
count,1576,1576,1576,1576,1576,1576,1576,1576
unique,1576,106,2,1548,2,2,2,2
top,A26004,We should ban naturopathy,in favor of,We should develop adequate border protection. ...,False,False,True,True
freq,1,183,868,2,1110,994,1082,1091


In [11]:
validation_df.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,False,False,True,False
1,A01012,The use of public defenders should be mandatory,in favor of,the use of public defenders should be mandator...,False,False,False,True
2,A02001,Payday loans should be banned,in favor of,payday loans create a more impoverished societ...,False,False,True,True
3,A02002,Surrogacy should be banned,against,Surrogacy should not be banned as it is the wo...,True,False,False,False
4,A02009,Entrapment should be legalized,against,entrapment is gravely immoral and against huma...,False,False,True,True


In [12]:
test_df["Openness_to_change"]

0       False
1       False
2       False
3       False
4       False
        ...  
1571    False
1572    False
1573     True
1574     True
1575    False
Name: Openness_to_change, Length: 1576, dtype: bool

In [13]:
### Define a mapping for "Stance" column
stance_mapping = {"in favor of": True, "against": False}

### Apply the mapping to convert strings to boolean values
train_df["Stance"] = train_df["Stance"].map(stance_mapping)
validation_df["Stance"] = validation_df["Stance"].map(stance_mapping)
test_df["Stance"] = test_df["Stance"].map(stance_mapping)

In [14]:
train_df.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
0,A01002,We should ban human cloning,True,we should ban human cloning as it will only ca...,False,False,True,False
1,A01005,We should ban fast food,True,fast food should be banned because it is reall...,False,False,True,False
2,A01006,We should end the use of economic sanctions,False,sometimes economic sanctions are the only thin...,False,True,True,False
3,A01007,We should abolish capital punishment,False,capital punishment is sometimes the only optio...,False,False,True,True
4,A01008,We should ban factory farming,False,factory farming allows for the production of c...,False,False,True,True


## TASK 2

### Uniform Baseline

In [15]:
np.random.seed(12345678)

In [16]:
clf_list = [DummyClassifier(strategy="uniform") for _ in level_3_cat]
[
    clf.fit(X=train_df[columns_to_keep[1:]], y=train_df[cat])
    for clf, cat in zip(clf_list, level_3_cat)
]
prediction_uniform = np.array(
    [clf.predict(X=test_df[columns_to_keep[1:]]) for clf in clf_list]
).T

### F1 score for Unifrom Baseline

In [17]:
### Evaluate F1 overall
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_uniform, average="weighted"
)
print(f"Random Classifier F1 overall weighted : {f1_overall:.4f}")

### Evaluate F1 overall
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_uniform, average="macro"
)
print(f"Random Classifier F1 overall macro: {f1_overall:.4f}")


### Evaluate F1 per category
f1_per_cat = [
    f1_score(y_true=test_df[cat], y_pred=prediction_uniform[:, i])
    for i, cat in enumerate(level_3_cat)
]
print(f"Random Classifier F1 per category: {f1_per_cat}")

Random Classifier F1 overall weighted : 0.5144
Random Classifier F1 overall macro: 0.4820
Random Classifier F1 per category: [0.3607342378292099, 0.4246376811594203, 0.5750663129973476, 0.5676109032602886]


### Majority Baseline

In [18]:
clf_list = [DummyClassifier(strategy="most_frequent") for _ in level_3_cat]
[
    clf.fit(X=train_df[columns_to_keep[1:]], y=train_df[cat])
    for clf, cat in zip(clf_list, level_3_cat)
]
prediction_majority = np.array(
    [clf.predict(X=test_df[columns_to_keep[1:]]) for clf in clf_list]
).T

### F1 score for Majority Baseline

In [19]:
### Evaluate over all F1
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_majority, average="weighted"
)
print(f"Majority Classifier F1 weighted : {f1_overall:.4f}")

### Evaluate over all F1
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_majority, average="macro"
)
print(f"Majority Classifier F1 macro: {f1_overall:.4f}")


### Evaluate F1 per category
f1_per_cat = [
    f1_score(y_true=test_df[cat], y_pred=prediction_majority[:, i])
    for i, cat in enumerate(level_3_cat)
]
print(f"Random Classifier F1 per category: {f1_per_cat}")

Majority Classifier F1 weighted : 0.5506
Majority Classifier F1 macro: 0.4081
Random Classifier F1 per category: [0.0, 0.0, 0.8141459744168548, 0.8181477315335584]


### BERT Classifier

In [20]:
### Convert dataframes into datasets
train_dataset = Dataset.from_pandas(train_df)
validation_dataset = Dataset.from_pandas(validation_df)
test_dataset = Dataset.from_pandas(test_df)

In [21]:
id2label = {idx: label for idx, label in enumerate(level_3_cat)}
label2id = {label: idx for idx, label in enumerate(level_3_cat)}

In [22]:
model_card = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_card)
model = AutoModelForSequenceClassification.from_pretrained(
    model_card,
    problem_type="multi_label_classification",
    num_labels=len(level_3_cat),
    id2label=id2label,
    label2id=label2id,
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Conclusion Only Model

In [39]:
### Encoding for Conclusion only model
def tokenize_conclusion(example):
    ### Tokenize text columns
    text_tokens = tokenizer(
        example["Conclusion"],
        truncation=True,
        padding="max_length",
        max_length=tokenizer.model_max_length,
        return_tensors="pt",
    )

    ### Combine text tokens with non-text features
    encoded_example = {
        "input_ids": text_tokens["input_ids"],
        "token_type_ids": text_tokens["token_type_ids"],
        "attention_mask": text_tokens["attention_mask"],
        "Openness_to_change": torch.tensor(
            example["Openness_to_change"], dtype=torch.float
        ),
        "Self_enhancement": torch.tensor(
            example["Self_enhancement"], dtype=torch.float
        ),
        "Conversation": torch.tensor(example["Conversation"], dtype=torch.float),
        "Self_transcendence": torch.tensor(
            example["Self_transcendence"], dtype=torch.float
        ),
    }

    labels_batch = {k: example[k] for k in example.keys() if k in level_3_cat}
    labels_matrix = np.zeros(len(example("Conclusion"), len(level_3_cat)))
    for i, label in enumerate(level_3_cat):
        labels_matrix[:, i] = labels_batch[label]

    encoded_example["labels"] = labels_matrix.tolist()

    # print(encoded_example)
    return encoded_example

In [24]:
### Tokenize train, validation, test datasets
ds_list = [
    d.map(tokenize_conclusion, batched=True)
    for d in (train_dataset, validation_dataset, test_dataset)
]

### Set format for train, validation, test tokenized datasets
columns = [
    "input_ids",
    "token_type_ids",
    "attention_mask",
    "Openness_to_change",
    "Self_enhancement",
    "Conversation",
    "Self_transcendence",
]

for d in ds_list:
    d.set_format(type="torch", columns=columns)
train_tokenized_ds, valid_tokenized_ds, test_tokenized_ds = ds_list

# ### Tokenize training data
# test_tokenized_dataset = test_dataset.map(tokenize_conclusion, batched=True)
# test_tokenized_dataset.set_format(
#     "tensorflow",
#     columns=[
#         "input_ids",
#         "token_type_ids",
#         "attention_mask",
#         "Openness_to_change",
#         "Self_enhancement",
#         "Conversation",
#         "Self_transcendence",
#     ],
# )

Map:   0%|          | 0/5393 [00:00<?, ? examples/s]

Map:   0%|          | 0/1896 [00:00<?, ? examples/s]

Map:   0%|          | 0/1576 [00:00<?, ? examples/s]

In [25]:
### Sanity check
print(train_tokenized_ds["Conclusion"][50])
decoded_text = tokenizer.decode(train_tokenized_ds["input_ids"][50])
print(decoded_text)

We should prohibit school prayer
[CLS] we should prohibit school prayer [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD

In [26]:
### Collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [27]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_card, num_labels=len(level_3_cat), id2label=id2label, label2id=label2id
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [28]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

## Metrics

In [29]:
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    metrics = f1_score(y_true=y_true, y_pred=y_pred, average="macro")
    return metrics


def compute_metrics(prediction):
    preds = (
        prediction.predictions[0]
        if isinstance(prediction.predictions, tuple)
        else prediction.predictions
    )
    result = multi_label_metrics(predictions=preds, labels=prediction.label_ids)
    return result

### Training Arguments

In [30]:
training_args = TrainingArguments(
    output_dir="/Models/BertBaseUncased",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    load_best_model_at_end=True,
    report_to="none",
    save_strategy="epoch",  #'no'
    evaluation_strategy="epoch",
    num_train_epochs=1,  ### fine tuning
    weight_decay=0.01,
    metric_for_best_model="f1",
)

In [31]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized_ds,
    eval_dataset=valid_tokenized_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [32]:
trainer.train()

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


ValueError: ignored

In [None]:
trainer.evaluate()

## Conclusion - Premise Model

In [None]:
### Encoding for Conclusion - Premise model
def tokenize_conclusion_premise(example):
    ### Tokenize text columns
    text_tokens = tokenizer(
        example["Conclusion"],
        example["Premise"],
        truncation=True,
        padding="max_length",
        max_length=tokenizer.model_max_length,
        return_tensors="pt",
    )

    ### Combine text tokens with non-text features
    encoded_example = {
        "input_ids": text_tokens["input_ids"],
        "token_type_ids": text_tokens["token_type_ids"],
        "attention_mask": text_tokens["attention_mask"],
        "Openness_to_change": torch.tensor(
            example["Openness_to_change"], dtype=torch.bool
        ),
        "Self_enhancement": torch.tensor(example["Self_enhancement"], dtype=torch.bool),
        "Conversation": torch.tensor(example["Conversation"], dtype=torch.bool),
        "Self_transcendence": torch.tensor(
            example["Self_transcendence"], dtype=torch.bool
        ),
    }

    return encoded_example

## Conclusion - Premise - Stance Model

In [None]:
### Encoding for Conclusion - Premise - Stance model
def tokenize_conclusion_premise_stance(example):
    ### Tokenize text columns
    text_tokens = tokenizer(
        example["Conclusion"],
        example["Premise"],
        truncation=True,
        padding="max_length",
        max_length=tokenizer.model_max_length,
        return_tensors="pt",
    )

    ### Combine text tokens with non-text features
    encoded_example = {
        "input_ids": text_tokens["input_ids"],
        "token_type_ids": text_tokens["token_type_ids"],
        "attention_mask": text_tokens["attention_mask"],
        "Stance": torch.tensor(
            example["Stance"], dtype=torch.bool
        ),  ### Assuming 'Stance' is represented as 0 or 1
        "Openness_to_change": torch.tensor(
            example["Openness_to_change"], dtype=torch.bool
        ),
        "Self_enhancement": torch.tensor(example["Self_enhancement"], dtype=torch.bool),
        "Conversation": torch.tensor(example["Conversation"], dtype=torch.bool),
        "Self_transcendence": torch.tensor(
            example["Self_transcendence"], dtype=torch.bool
        ),
    }

    return encoded_example