### Read training, dev and unlabeled test data

In [1]:
# !pip install datasets

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [2]:
# !pip install transformers

In [3]:
import csv

In [4]:
train, dev, test = [], [], []

In [5]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print(len(train))
print(train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [6]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [7]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [8]:
from datasets import load_dataset
import datasets
import pandas as pd

dataset = {'train':[], 'test':[], 'acc_test':[]}

for x in train:
    dataset['train'].append({'label':int(x[2]), 'text':x[0] + ' ' + x[1]})

for x in dev:
    dataset['test'].append({'label':int(x[2]), 'text':x[0] + ' ' + x[1]})
  
for x in test:
    dataset['acc_test'].append({'text':x[0] + ' ' + x[1]})

train_dataset = datasets.Dataset.from_pandas(pd.DataFrame(data=dataset["train"]))
eval_dataset = datasets.Dataset.from_pandas(pd.DataFrame(data=dataset["test"]))
test_dataset = datasets.Dataset.from_pandas(pd.DataFrame(data=dataset["acc_test"]))

In [21]:
model_name = 'bert-base-uncased'

In [23]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name,model_max_length=512)

from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


tokenized_datasets_train = train_dataset.map(tokenize_function, batched=True)
tokenized_datasets_eval = eval_dataset.map(tokenize_function, batched=True)
tokenized_datasets_test = test_dataset.map(tokenize_function, batched=True)

small_train_dataset = tokenized_datasets_train
small_eval_dataset = tokenized_datasets_eval
small_test_dataset = tokenized_datasets_test

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.18.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

  0%|          | 0/6 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/5 [00:00<?, ?ba/s]

In [36]:
import transformers

In [38]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer")

import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

from transformers import TrainingArguments, Trainer
grouped_params = model.parameters()
optimizer = transformers.AdamW(grouped_params,lr=0.00025)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer=optimizer, num_warmup_steps=2000, num_training_steps=60000)
optimizers = optimizer, scheduler

training_args = TrainingArguments(
      output_dir="test_trainer", 
      evaluation_strategy="epoch",
      num_train_epochs=5,

    )

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [39]:
import torch
torch.cuda.empty_cache()

In [40]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
    optimizers=optimizers
    
)



In [None]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 5983
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3740


Epoch,Training Loss,Validation Loss


In [28]:
acc = trainer.evaluate(small_eval_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1055
  Batch size = 8


In [29]:
print(acc)

{'eval_loss': 0.6392191052436829, 'eval_accuracy': 0.8454976303317535, 'eval_runtime': 37.8649, 'eval_samples_per_second': 27.862, 'eval_steps_per_second': 3.486, 'epoch': 3.0}


In [30]:
preds = trainer.predict(small_test_dataset)

The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 4850
  Batch size = 8


In [32]:
print(preds)

PredictionOutput(predictions=array([[-1.3778298,  1.5011126],
       [-1.36724  ,  1.4937118],
       [ 1.5546438, -1.6784567],
       ...,
       [ 1.5496392, -1.6674788],
       [-1.0782232,  1.2156904],
       [ 1.4729838, -1.5388457]], dtype=float32), label_ids=None, metrics={'test_runtime': 76.0149, 'test_samples_per_second': 63.803, 'test_steps_per_second': 7.985})


In [35]:
print(preds.predictions)

[[-1.3778298  1.5011126]
 [-1.36724    1.4937118]
 [ 1.5546438 -1.6784567]
 ...
 [ 1.5496392 -1.6674788]
 [-1.0782232  1.2156904]
 [ 1.4729838 -1.5388457]]


In [31]:
import numpy as np

In [36]:
# print(len(preds))

3


In [32]:
# Eventually, results need to be a list of 2028 0 or 1's
results = [np.argmax(x) for x in preds.predictions]

In [39]:
print(results)

[1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [33]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [41]:
# # make sure the results are not float numbers, but intergers 0 and 1
# results = [int(x) for x in results]

In [34]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open(f'upload_predictions_8454.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
      print(x)
      fp.write(str(x) + '\n')

1
1
0
0
0
0
0
1
0
0
1
1
1
0
1
0
1
0
1
0
1
0
0
0
1
1
1
0
0
0
1
1
0
1
1
0
0
1
1
1
0
1
0
1
1
1
0
0
0
0
0
1
0
1
0
0
0
0
0
0
1
1
0
0
1
0
0
0
1
0
1
0
1
0
1
0
0
1
1
0
0
0
1
0
1
1
0
0
1
1
1
1
1
1
0
1
1
0
0
0
0
0
1
0
1
1
0
0
1
0
0
1
0
0
1
0
0
1
0
0
0
0
0
0
1
0
1
0
0
0
0
0
0
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
0
0
0
1
1
0
1
1
0
1
0
1
0
1
1
1
0
0
0
0
0
0
1
0
0
0
1
1
0
0
1
1
1
0
1
1
0
0
1
0
0
0
0
0
1
1
1
1
1
0
1
0
0
1
0
0
1
1
0
1
1
1
0
0
1
1
1
1
0
1
1
1
1
1
1
1
1
0
0
0
0
0
0
1
1
0
0
1
1
0
1
0
0
0
0
1
0
1
1
0
0
1
0
0
1
1
1
0
0
1
1
0
0
0
0
1
1
0
1
0
0
0
1
0
0
1
0
1
0
0
0
1
0
1
0
1
1
0
0
0
1
0
0
0
0
0
1
1
1
1
0
1
0
1
1
0
0
0
0
0
1
1
1
1
1
0
0
0
1
0
0
0
1
0
0
1
0
1
0
1
1
0
0
1
1
0
1
0
1
0
1
0
0
1
0
0
1
1
0
1
1
1
1
1
1
0
1
1
0
1
0
0
0
1
1
0
0
1
0
1
1
0
0
0
1
0
1
0
1
1
1
1
1
1
1
1
1
0
1
1
1
1
0
0
0
1
1
1
0
0
1
1
0
1
1
1
1
0
0
1
0
0
0
0
1
1
1
0
1
0
1
1
1
0
1
1
0
0
0
0
1
1
0
0
1
1
0
1
1
1
1
1
1
0
1
1
0
1
1
1
1
1
0
1
1
1
0
0
1
0
0
0
1
0
1
1
1
1
1
0
1
0
1
1
1
1
0
0
0
0
1
1
0
1
1
0
1
0
1
0
1
1
0
0
0
0
0
1
1
0
0
1
