### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [None]:
import csv
import numpy as np
#!pip install transformers
#!pip install datasets
#!pip install torch
#!pip install scikit-learn
%pip install hf_xet
%pip install transformers[torch]
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
from sklearn.metrics import accuracy_score
import torch


In [29]:
train, dev, test = [], [], []

In [30]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [31]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [32]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [33]:
# Sentiment Analysis? Sentiment Matching?
#format of train

preconditions = [example[0] for example in train]
statements = [example[1] for example in train]
labelsTR = [int(example[2]) for example in train]  # convert labels to int

train_data = {
    'precondition': preconditions,
    'statement': statements,
    'label': labelsTR
}

## Dev
preconditions = [example[0] for example in dev]
statements = [example[1] for example in dev]
labelsD = [int(example[2]) for example in dev]  # convert labels to int

dev_data = {
    'precondition': preconditions,
    'statement': statements,
    'label': labelsD
}

## Test
preconditions = [example[0] for example in test]
statements = [example[1] for example in test]

test_data = {
    'precondition': preconditions,
    'statement': statements
    }


In [34]:
# Sample dataset

dataset = Dataset.from_dict(train_data)


# Tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
# Load model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

def preprocess(examples):
    return tokenizer(examples['precondition'], examples['statement'], truncation=True, padding='max_length', max_length=128)

#Tokenize datasets
tokenized_train = dataset.map(preprocess, batched=True)
#tokenized_val = val_dataset.map(preprocess, batched=True)


#Define evaluation metric
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}


# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    #per_device_eval_batch_size=16,
    #evaluation_strategy='epoch',  # Evaluate after every epoch
    logging_dir='./logs',
    save_strategy='epoch',
    evaluation_strategy='no',
    #load_best_model_at_end=True,
    #metric_for_best_model='accuracy',
    logging_strategy='epoch'
)
# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    #eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    #compute_metrics=compute_metrics
)

# Train the model
trainer.train()

# Optional: Save the model
trainer.save_model("./reasoning-model")
tokenizer.save_pretrained("./reasoning-model")

#eval_results = trainer.evaluate()
#print(eval_results)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/5983 [00:00<?, ? examples/s]



ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`

In [None]:

dataset_val = Dataset.from_dict(dev_data)
tokenized_predictv = dataset_val.map(preprocess, batched=True)

# Make predictions
predictions0 = trainer.predict(tokenized_predictv)
resultsv = np.argmax(predictions0.predictions, axis=-1).tolist()

# Step 4: Evaluate (accuracy or more detailed metrics)
accuracy = accuracy_score(labelsD, resultsv)
print(f"Validation Accuracy: {accuracy:.4f}")


In [None]:
accv = accuracy_score(labelsD,resultsv)
print(accv)

In [None]:
dataset_test = Dataset.from_dict(test_data)
tokenized_test = dataset_test.map(preprocess, batched=True)

# Make predictions
predictionst = trainer.predict(tokenized_test)
logits = predictionst.predictions
results = np.argmax(logits, axis=-1).tolist()


In [None]:
# Eventually, results need to be a list of 2028 0 or 1's
#results = []

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [None]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 2028)
# assert (len(results) == 4850)

In [None]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [None]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')