In [2]:
import pandas as pd
import numpy as np
from sklearn.dummy import DummyClassifier
from sklearn.metrics import f1_score
from transformers import BertTokenizer, TFBertModel

## TASK 1


In [3]:
### Arguments
arguments_training_url = (
    "https://zenodo.org/records/8248658/files/arguments-training.tsv?download=1"
)
arguments_validation_url = (
    "https://zenodo.org/records/8248658/files/arguments-validation.tsv?download=1"
)
arguments_test_url = (
    "https://zenodo.org/records/8248658/files/arguments-test.tsv?download=1"
)

### Human values
labels_training_url = (
    "https://zenodo.org/records/8248658/files/labels-training.tsv?download=1"
)
labels_validation_url = (
    "https://zenodo.org/records/8248658/files/labels-validation.tsv?download=1"
)
labels_test_url = "https://zenodo.org/records/8248658/files/labels-test.tsv?download=1"

In [4]:
### Creating Training dataframe
arguments_tr_df = pd.read_csv(arguments_training_url, sep="\t")
labels_tr_df = pd.read_csv(labels_training_url, sep="\t")

### Merging arguments and labels
train_df_nm = pd.merge(arguments_tr_df, labels_tr_df, on="Argument ID")

### Creating Validation dataframe
arguments_va_df = pd.read_csv(arguments_validation_url, sep="\t")
labels_va_df = pd.read_csv(labels_validation_url, sep="\t")

### Merging arguments and labels
validation_df_nm = pd.merge(arguments_va_df, labels_va_df, on="Argument ID")

### Creating Test dataframe
arguments_te_df = pd.read_csv(arguments_test_url, sep="\t")
labels_te_df = pd.read_csv(labels_test_url, sep="\t")

### Merging arguments and labels
test_df_nm = pd.merge(arguments_te_df, labels_te_df, on="Argument ID")

### Notation
### nm=not merged with logical OR

In [5]:
train_df_nm.describe()

Unnamed: 0,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,Power: resources,Face,Security: personal,Security: societal,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
count,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0,5393.0
mean,0.1832,0.258669,0.0458,0.031893,0.280363,0.11311,0.115891,0.070833,0.370851,0.320415,0.105322,0.218246,0.038383,0.073243,0.246987,0.149453,0.385871,0.079177,0.123123,0.195439
std,0.386867,0.437944,0.209071,0.175732,0.449218,0.316756,0.320124,0.256569,0.483077,0.466679,0.306996,0.413094,0.192137,0.260559,0.431299,0.356567,0.486845,0.270039,0.328608,0.396575
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [6]:
train_df_nm.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Self-direction: thought,Self-direction: action,Stimulation,Hedonism,Achievement,Power: dominance,...,Tradition,Conformity: rules,Conformity: interpersonal,Humility,Benevolence: caring,Benevolence: dependability,Universalism: concern,Universalism: nature,Universalism: tolerance,Universalism: objectivity
0,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,A01006,We should end the use of economic sanctions,against,sometimes economic sanctions are the only thin...,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,A01007,We should abolish capital punishment,against,capital punishment is sometimes the only optio...,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,A01008,We should ban factory farming,against,factory farming allows for the production of c...,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,0


In [7]:
### Considering category ranges (0,3),(3,7),(7,13),(13,19)
### adding +4, considering the first 4 columns which are not categories
column_ranges = [(4, 7), (7, 11), (11, 17), (17, 23)]
level_3_cat = [
    "Openness_to_change",
    "Self_enhancement",
    "Conversation",
    "Self_transcendence",
]
columns_to_keep = ["Argument ID", "Conclusion", "Stance", "Premise"]

### Creating final dataframes
train_df = pd.DataFrame()
validation_df = pd.DataFrame()
test_df = pd.DataFrame()

### Applying OR to the selected columns using .any(axis=1)
for (start, end), cat in zip(column_ranges, level_3_cat):
    train_df[cat] = train_df_nm.iloc[:, start:end].any(axis=1)
    validation_df[cat] = validation_df_nm.iloc[:, start:end].any(axis=1)
    test_df[cat] = test_df_nm.iloc[:, start:end].any(axis=1)

###Readding the columns to keep
train_df = pd.concat([train_df_nm[columns_to_keep], train_df], axis=1)
validation_df = pd.concat([validation_df_nm[columns_to_keep], validation_df], axis=1)
test_df = pd.concat([test_df_nm[columns_to_keep], test_df], axis=1)

In [8]:
train_df.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
0,A01002,We should ban human cloning,in favor of,we should ban human cloning as it will only ca...,False,False,True,False
1,A01005,We should ban fast food,in favor of,fast food should be banned because it is reall...,False,False,True,False
2,A01006,We should end the use of economic sanctions,against,sometimes economic sanctions are the only thin...,False,True,True,False
3,A01007,We should abolish capital punishment,against,capital punishment is sometimes the only optio...,False,False,True,True
4,A01008,We should ban factory farming,against,factory farming allows for the production of c...,False,False,True,True


In [9]:
train_df.describe()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
count,5393,5393,5393,5393,5393,5393,5393,5393
unique,5393,332,2,5313,2,2,2,2
top,A01002,We should abolish the three-strikes laws,in favor of,Migrants sell drugs.,False,False,True,True
freq,1,114,2898,2,3451,3089,4018,3820


In [10]:
validation_df.head()

Unnamed: 0,Argument ID,Conclusion,Stance,Premise,Openness_to_change,Self_enhancement,Conversation,Self_transcendence
0,A01001,Entrapment should be legalized,in favor of,if entrapment can serve to more easily capture...,False,False,True,False
1,A01012,The use of public defenders should be mandatory,in favor of,the use of public defenders should be mandator...,False,False,False,True
2,A02001,Payday loans should be banned,in favor of,payday loans create a more impoverished societ...,False,False,True,True
3,A02002,Surrogacy should be banned,against,Surrogacy should not be banned as it is the wo...,True,False,False,False
4,A02009,Entrapment should be legalized,against,entrapment is gravely immoral and against huma...,False,False,True,True


In [11]:
test_df["Openness_to_change"]

0       False
1       False
2       False
3       False
4       False
        ...  
1571    False
1572    False
1573     True
1574     True
1575    False
Name: Openness_to_change, Length: 1576, dtype: bool

## TASK 2

### Uniform Baseline

In [12]:
np.random.seed(12345678)

In [13]:
clf_list = [DummyClassifier(strategy="uniform") for _ in level_3_cat]
[
    clf.fit(X=train_df[columns_to_keep[1:]], y=train_df[cat])
    for clf, cat in zip(clf_list, level_3_cat)
]
prediction_uniform = np.array(
    [clf.predict(X=test_df[columns_to_keep[1:]]) for clf in clf_list]
).T

### Accuracy

In [14]:
### Evaluate F1 overall
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_uniform, average="weighted"
)
print(f"Random Classifier Accuracy overall weighted : {f1_overall:.4f}")

### Evaluate F1 overall
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_uniform, average="macro"
)
print(f"Random Classifier Accuracy overall macro: {f1_overall:.4f}")


### Evaluate F1 per category
f1_per_cat = [
    f1_score(y_true=test_df[cat], y_pred=prediction_uniform[:, i])
    for i, cat in enumerate(level_3_cat)
]
print(f"Random Classifier Accuracy per category: {f1_per_cat}")

Random Classifier Accuracy overall weighted : 0.5144
Random Classifier Accuracy overall macro: 0.4820
Random Classifier Accuracy per category: [0.3607342378292099, 0.4246376811594203, 0.5750663129973476, 0.5676109032602886]


### Majority Baseline

In [15]:
clf_list = [DummyClassifier(strategy="most_frequent") for _ in level_3_cat]
[
    clf.fit(X=train_df[columns_to_keep[1:]], y=train_df[cat])
    for clf, cat in zip(clf_list, level_3_cat)
]
prediction_majority = np.array(
    [clf.predict(X=test_df[columns_to_keep[1:]]) for clf in clf_list]
).T

In [16]:
np.sum(
    np.array(
        [0.4018691588785046, 0.3937823834196891, 0.5768194070080863, 0.564516129032]
    )
) / 4

0.48424676958457

### Accuracy

In [17]:
### Evaluate over all F1
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_majority, average="weighted"
)
print(f"Majority Classifier Accuracy weighted : {f1_overall:.4f}")

### Evaluate over all F1
f1_overall = f1_score(
    y_true=test_df[level_3_cat], y_pred=prediction_majority, average="macro"
)
print(f"Majority Classifier Accuracy macro: {f1_overall:.4f}")


### Evaluate F1 per category
f1_per_cat = [
    f1_score(y_true=test_df[cat], y_pred=prediction_majority[:, i])
    for i, cat in enumerate(level_3_cat)
]
print(f"Random Classifier Accuracy per category: {f1_per_cat}")

Majority Classifier Accuracy weighted : 0.5506
Majority Classifier Accuracy macro: 0.4081
Random Classifier Accuracy per category: [0.0, 0.0, 0.8141459744168548, 0.8181477315335584]


### BERT Classifier

In [18]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = TFBertModel.from_pretrained("bert-base-uncased")
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors="tf")
output = model(encoded_input)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]




Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [28]:
print(type(output[0]), output[0].shape)
print(type(output[0]), output[1].shape)

<class 'tensorflow.python.framework.ops.EagerTensor'> (1, 12, 768)
<class 'tensorflow.python.framework.ops.EagerTensor'> (1, 768)
