In [None]:
# Install simple transformers
!pip install simpletransformers
from simpletransformers.classification import ClassificationModel
import pandas as pd
from sklearn.metrics import classification_report
import logging
import csv
import numpy as np

In [None]:
# Function to return classification report
def clf_report(labels, predictions):
    return classification_report(labels, predictions, output_dict=True)

## Train roBERTa

In [None]:

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# Load training data
train_dataframe = pd.read_csv('train.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
train_dataframe.columns = ['claim_labels', 'topic_sentences', 'claim_sentences', 'id', 'labels']
train_dataframe = train_dataframe[['topic_sentences', 'claim_sentences', 'labels']]
train_positive = train_dataframe[train_dataframe['labels']!=0]
train_negative = train_dataframe[train_dataframe['labels']==0]
num_pos = train_positive.shape[0]
# adopt negative sampling from IAM dataset
train_negative = train_negative.sample(n=5*num_pos, replace=False)
train_dataframe = train_positive.append(train_negative)

# Load dev set
dev_dataframe = pd.read_csv('dev.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
dev_dataframe.columns = ['claim_labels', 'topic_sentences', 'claim_sentences', 'id', 'labels']
dev_dataframe = dev_dataframe[['topic_sentences', 'claim_sentences', 'labels']]

In [None]:
# Set training arguments - we use the same training arguemnts from the IAM dataset
train_args = {
    'evaluate_during_training': True,
    'evaluate_during_training_verbose': True,
    'max_seq_length': 128,
    'num_train_epochs': 10,
    'train_batch_size': 32,
    'labels_list': [0, 1, -1],
    'use_multiprocessing': False,
    'use_multiprocessing_for_evaluation': False,
    'overwrite_output_dir': True,
    'evaluate_during_training_steps': 100000
}

# load model
model = ClassificationModel('roberta' , num_labels=3, args=train_args, use_cuda=False)

# Train model 
model.train_model(train_dataframe, eval_df=dev_dataframe, clf_report=clf_report)

## Test

In [None]:
# Upload test set
test_df = pd.read_csv('test.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['claim_labels', 'topic_sentences', 'claim_sentences', 'id', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on test set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/claims_stance_result.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')

## Synonym

In [None]:
# Upload data
test_df = pd.read_csv('verb-replacement-synoym.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels','none']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/synonym_predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')

Running Evaluation:   0%|          | 0/884 [00:00<?, ?it/s]

## Antonym

In [None]:
# Upload data
test_df = pd.read_csv('verb-replacement-antonym.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/antonym-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


Running Evaluation:   0%|          | 0/884 [00:00<?, ?it/s]

## Location


In [None]:
# Upload data
test_df = pd.read_csv('location.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/location_predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')



Running Evaluation:   0%|          | 0/883 [00:00<?, ?it/s]

## Contraction/Expansion

In [None]:
# Upload data
test_df = pd.read_csv('contraction-expansion.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/contraction-expansion-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Number

In [None]:
# Upload number change data
test_df = pd.read_csv('number.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on number change set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/number_predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Character Insertion

In [None]:
# Upload data
test_df = pd.read_csv('character-insertion.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/character-insertion-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Character Deletion

In [None]:
# Upload data
test_df = pd.read_csv('character-deletion.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/character-deletion-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Character Swapping

In [None]:
# Upload location change data
test_df = pd.read_csv('character-swapping.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on location change set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/location_predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Character Repetition

In [None]:
# Upload data
test_df = pd.read_csv('character-repetition.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/character-repetition-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Letter Case Change

In [None]:
# Upload data
test_df = pd.read_csv('letter-case-change.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on location change set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/letter-case-change-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Back Translation French

In [None]:
# Upload data
test_df = pd.read_csv('back-translation-french.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/back-translation-french-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Back Translation Spanish

In [None]:
# Upload data
test_df = pd.read_csv('back-translation-spanish.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/back-translation-spanish.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Paraphrase

In [None]:
# Upload data
test_df = pd.read_csv('paraphrase.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/paraphrase.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Synonym and Character Repetition

In [None]:
# Upload data
test_df = pd.read_csv('synonym-repetition.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/synonym-repetition.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Back translation (French) and Character Insertion

In [None]:
# Upload data
test_df = pd.read_csv('back-translation-insertion.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/back-translation-insertion-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')


## Verb Replacement (antonym) and Letter Case Change

In [None]:
# Upload data
test_df = pd.read_csv('antonym-lcc.txt', sep='\t', header=None, quoting=csv.QUOTE_NONE)
test_df.columns = ['topic_sentences', 'claim_sentences', 'labels']
test_df = test_df[['topic_sentences', 'claim_sentences', 'labels']]
# Evaluate on set
model = ClassificationModel('roberta', 'outputs/best_model/')
result, model_outputs, wrong_predictions = model.eval_model(test_df, clf_report=clf_report)

predictions = list(np.argmax(model_outputs, axis=-1))
label_map = {0: 0, 1: 1, 2: -1}
predictions = [label_map[x] for x in predictions]

# Add predictions to file
with open('outputs/antonym-lcc-predictions.txt', 'w') as f:
	for x in predictions:
	    f.write(str(x)+'\n')
