### Read training, dev and unlabeled test data

The following provides a starting code (Python 3) of how to read the labeled training and dev sentence pairs, and unlabeled test sentence pairs, into lists.

In [31]:
import csv

In [32]:
train, dev, test = [], [], []

In [33]:
with open('./data/pnli_train.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        train.append(x)
print (len(train))
print (train[:3])

5983
[['Sometimes do exercise.', 'A person typically desire healthy life.', '1'], ['Who eats junk foods.', 'A person typically desire healthy life.', '0'], ['A person is sick.', 'A person typically desire healthy life.', '1']]


In [34]:
with open('./data/pnli_dev.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[2] will be the label (0 or 1). x[0] and x[1] will be the sentence pairs.
        dev.append(x)
print (len(dev))
print (dev[:3])

1055
[['A person is looking for accuracy.', 'A person typically desires accurate results.', '1'], ['A person does not care for accuracy.', 'A person typically desires accurate results.', '0'], ['The person double checks their data.', 'A person typically desires accurate results.', '1']]


In [35]:
with open('./data/pnli_test_unlabeled.csv', encoding='utf-8') as fp:
    csvreader = csv.reader(fp)
    for x in csvreader:
        # x[0] and x[1] will be the sentence pairs.
        test.append(x)
print (len(test))
print (test[:3])

4850
[['The people want to have a romantic and pleasant feel.', 'People typically does desire to smell violets.'], ['The contract is to buy products from you.', 'Getting contract typically cause to make money or spend money.'], ['Train station is closed.', 'Line can typically be used to move train along tracks.']]


### Main Code Body

You may choose to experiment with different methods using your program. However, you need to embed the training and inference processes at here. We will use your prediction on the unlabeled test data to grade, while checking this part to understand how your method has produced the predictions.

In [36]:
import pandas as pd
train_data=pd.DataFrame(train,columns=["precondition","statement","labels"])

In [37]:
train_data.head()

Unnamed: 0,precondition,statement,labels
0,Sometimes do exercise.,A person typically desire healthy life.,1
1,Who eats junk foods.,A person typically desire healthy life.,0
2,A person is sick.,A person typically desire healthy life.,1
3,A person is dead.,A person typically desire healthy life.,0
4,A person eats properly and do exercise regularly.,A person typically desire healthy life.,1


In [38]:
train_data["labels"] = train_data["labels"].astype(str).astype(int)

In [39]:
train_data.dtypes

precondition    object
statement       object
labels           int64
dtype: object

In [40]:
dev_data=pd.DataFrame(dev,columns=["precondition","statement","labels"])

In [41]:
dev_data["labels"] = dev_data["labels"].astype(str).astype(int)

In [42]:
test_data=pd.DataFrame(test,columns=["precondition","statement"])
test_data.head()

Unnamed: 0,precondition,statement
0,The people want to have a romantic and pleasan...,People typically does desire to smell violets.
1,The contract is to buy products from you.,Getting contract typically cause to make money...
2,Train station is closed.,Line can typically be used to move train along...
3,There is no water for driving the boats.,People typically desires drive boats for fun.
4,The poet is busy.,Poet can typically be used for creating poetry.


In [43]:
!pip install --no-cache-dir transformers sentencepiece



In [44]:
from transformers import BertTokenizer,TFBertModel,TFAutoModel,AutoTokenizer
tokenizer=AutoTokenizer.from_pretrained("ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli",from_pt=True)#use_fast=False)

In [45]:
def encoding_precondition(data):
    tokens_list=list()
    tokens_list.append('[CLS]')
    tokens_list+=list(tokenizer.tokenize(data))
    return(tokenizer.convert_tokens_to_ids(tokens_list))
def encode_statement(data):
    tokens_list=list()
    tokens_list.append('[sep]')
    tokens_list+=list(tokenizer.tokenize(data))
    tokens_list.append('[sep]')
    return(tokenizer.convert_tokens_to_ids(tokens_list))
tokenized=[]
for i in range(len(train_data)):
    pre=encoding_precondition(train_data['precondition'][i])
    st=encode_statement(train_data['statement'][i])
    tokenized.append(pre+st)
train_data['tokenized']=tokenized
train_data.head()

Unnamed: 0,precondition,statement,labels,tokenized
0,Sometimes do exercise.,A person typically desire healthy life.,1,"[3, 13624, 109, 3325, 4, 3, 250, 621, 3700, 47..."
1,Who eats junk foods.,A person typically desire healthy life.,0,"[3, 12375, 24923, 15163, 6592, 4, 3, 250, 621,..."
2,A person is sick.,A person typically desire healthy life.,1,"[3, 250, 621, 16, 4736, 4, 3, 250, 621, 3700, ..."
3,A person is dead.,A person typically desire healthy life.,0,"[3, 250, 621, 16, 1462, 4, 3, 250, 621, 3700, ..."
4,A person eats properly and do exercise regularly.,A person typically desire healthy life.,1,"[3, 250, 621, 24923, 5083, 8, 109, 3325, 4595,..."


In [46]:
tokenized_dev=[]
for i in range(len(dev_data)):
    pre=encoding_precondition(dev_data['precondition'][i])
    st=encode_statement(dev_data['statement'][i])
    tokenized_dev.append(pre+st)
dev_data['tokenized']=tokenized_dev
dev_data.head()

Unnamed: 0,precondition,statement,labels,tokenized
0,A person is looking for accuracy.,A person typically desires accurate results.,1,"[3, 250, 621, 16, 546, 13, 8611, 4, 3, 250, 62..."
1,A person does not care for accuracy.,A person typically desires accurate results.,0,"[3, 250, 621, 473, 45, 575, 13, 8611, 4, 3, 25..."
2,The person double checks their data.,A person typically desires accurate results.,1,"[3, 133, 621, 1457, 6240, 49, 414, 4, 3, 250, ..."
3,The person speeds through the experiment.,A person typically desires accurate results.,0,"[3, 133, 621, 9706, 149, 5, 9280, 4, 3, 250, 6..."
4,A person is studying well.,A person typically desires accurate results.,1,"[3, 250, 621, 16, 7739, 157, 4, 3, 250, 621, 3..."


In [47]:
mask_data_train=[]
mask_data_dev=[]
for i in range(len(train_data)):
    padded_seq=tokenizer(train_data['precondition'][i],train_data['statement'][i],padding=True,add_special_tokens=True)
    mask_data_train.append(padded_seq)
for i in range(len(dev_data)):
    padded_seq_dev=tokenizer(dev_data['precondition'][i],dev_data['statement'][i],padding=True,add_special_tokens=True)
    mask_data_dev.append(padded_seq_dev)

In [48]:
df_train=train_data
df_train['masked']=mask_data_train
df_train.head(5)

Unnamed: 0,precondition,statement,labels,tokenized,masked
0,Sometimes do exercise.,A person typically desire healthy life.,1,"[3, 13624, 109, 3325, 4, 3, 250, 621, 3700, 47...","[input_ids, attention_mask]"
1,Who eats junk foods.,A person typically desire healthy life.,0,"[3, 12375, 24923, 15163, 6592, 4, 3, 250, 621,...","[input_ids, attention_mask]"
2,A person is sick.,A person typically desire healthy life.,1,"[3, 250, 621, 16, 4736, 4, 3, 250, 621, 3700, ...","[input_ids, attention_mask]"
3,A person is dead.,A person typically desire healthy life.,0,"[3, 250, 621, 16, 1462, 4, 3, 250, 621, 3700, ...","[input_ids, attention_mask]"
4,A person eats properly and do exercise regularly.,A person typically desire healthy life.,1,"[3, 250, 621, 24923, 5083, 8, 109, 3325, 4595,...","[input_ids, attention_mask]"


In [49]:
df_dev=dev_data
df_dev['masked']=mask_data_dev
df_dev.head(5)

Unnamed: 0,precondition,statement,labels,tokenized,masked
0,A person is looking for accuracy.,A person typically desires accurate results.,1,"[3, 250, 621, 16, 546, 13, 8611, 4, 3, 250, 62...","[input_ids, attention_mask]"
1,A person does not care for accuracy.,A person typically desires accurate results.,0,"[3, 250, 621, 473, 45, 575, 13, 8611, 4, 3, 25...","[input_ids, attention_mask]"
2,The person double checks their data.,A person typically desires accurate results.,1,"[3, 133, 621, 1457, 6240, 49, 414, 4, 3, 250, ...","[input_ids, attention_mask]"
3,The person speeds through the experiment.,A person typically desires accurate results.,0,"[3, 133, 621, 9706, 149, 5, 9280, 4, 3, 250, 6...","[input_ids, attention_mask]"
4,A person is studying well.,A person typically desires accurate results.,1,"[3, 250, 621, 16, 7739, 157, 4, 3, 250, 621, 3...","[input_ids, attention_mask]"


In [50]:
import tensorflow as tf
from tensorflow import keras

In [69]:
max_len=31
def build_model():
    encoder=TFAutoModel.from_pretrained('ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli',from_pt=True)
    input_word_ids=tf.keras.Input(shape=(max_len,),dtype=tf.int32,name="input_word_ids")  
    input_mask=tf.keras.Input(shape=(max_len,),dtype=tf.int32,name="input_mask")
    embedding=encoder([input_word_ids,input_mask])[0] 
    output=tf.keras.layers.Dense(2,activation='softmax')(embedding[:,0,:])
    model=tf.keras.Model(inputs=[input_word_ids,input_mask],outputs=output) 
    loss_fn=keras.losses.SparseCategoricalCrossentropy() 
    model.compile(tf.keras.optimizers.Adam(lr=1e-5),loss=loss_fn,metrics=['accuracy'])
    return(model)

In [70]:
def input_data_con(data):
    inputs={'input_word_ids':list(),'input_mask':list()}
    for i in data:
        inputs['input_word_ids'].append(i['input_ids'])
        inputs['input_mask'].append(i['attention_mask'])
    inputs['input_word_ids']=tf.ragged.constant(inputs['input_word_ids']).to_tensor()
    inputs['input_mask']=tf.ragged.constant(inputs['input_mask']).to_tensor()
    return(inputs)

In [71]:
df_train_input=input_data_con(df_train['masked'].values)
for key in df_train_input.keys():
    df_train_input[key] =df_train_input[key][:,:max_len]

In [72]:
df_dev_input=input_data_con(df_dev['masked'].values)
for key in df_dev_input.keys():
    df_dev_input[key]=df_dev_input[key][:,:max_len]

In [73]:
model=build_model()
model.summary()

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_word_ids (InputLayer)    [(None, 31)]         0           []                               
                                                                                                  
 input_mask (InputLayer)        [(None, 31)]         0           []                               
                                                                                                  
 tf_roberta_model_2 (TFRobertaM  TFBaseModelOutputWi  355359744  ['input_word_ids[0][0]',         
 odel)                          thPoolingAndCrossAt               'input_mask[0][0]']             
                                tentions(last_hidde                                               
                                n_state=(None, 31,                                          

  super(Adam, self).__init__(name, **kwargs)


In [74]:
model.fit(df_train_input,df_train['labels'],verbose=2,validation_data=(df_dev_input,df_dev['labels']),epochs=8) 

Epoch 1/8
187/187 - 181s - loss: 0.3399 - accuracy: 0.8641 - val_loss: 0.2552 - val_accuracy: 0.9062 - 181s/epoch - 965ms/step
Epoch 2/8
187/187 - 141s - loss: 0.1960 - accuracy: 0.9340 - val_loss: 0.2353 - val_accuracy: 0.9175 - 141s/epoch - 756ms/step
Epoch 3/8
187/187 - 141s - loss: 0.1335 - accuracy: 0.9555 - val_loss: 0.2952 - val_accuracy: 0.9137 - 141s/epoch - 755ms/step
Epoch 4/8
187/187 - 141s - loss: 0.0840 - accuracy: 0.9708 - val_loss: 0.2986 - val_accuracy: 0.9118 - 141s/epoch - 753ms/step
Epoch 5/8
187/187 - 141s - loss: 0.0573 - accuracy: 0.9808 - val_loss: 0.3949 - val_accuracy: 0.9118 - 141s/epoch - 755ms/step
Epoch 6/8
187/187 - 141s - loss: 0.0449 - accuracy: 0.9840 - val_loss: 0.3271 - val_accuracy: 0.9071 - 141s/epoch - 755ms/step
Epoch 7/8
187/187 - 141s - loss: 0.0380 - accuracy: 0.9873 - val_loss: 0.4731 - val_accuracy: 0.9118 - 141s/epoch - 754ms/step
Epoch 8/8
187/187 - 141s - loss: 0.0314 - accuracy: 0.9891 - val_loss: 0.4496 - val_accuracy: 0.9185 - 141s/epo

<keras.callbacks.History at 0x7fed06081a50>

In [75]:
tokenized_test=[]
for i in range(len(test_data)):
    pre=encoding_precondition(test_data['precondition'][i])
    st=encode_statement(test_data['statement'][i])
    tokenized_test.append(pre+st)
test_data['tokenized']=tokenized_test
test_data.head()

Unnamed: 0,precondition,statement,tokenized
0,The people want to have a romantic and pleasan...,People typically does desire to smell violets.,"[3, 133, 82, 236, 7, 33, 10, 8728, 8, 16219, 6..."
1,The contract is to buy products from you.,Getting contract typically cause to make money...,"[3, 133, 1355, 16, 7, 907, 785, 31, 47, 4, 3, ..."
2,Train station is closed.,Line can typically be used to move train along...,"[3, 40249, 1992, 16, 1367, 4, 3, 18997, 64, 37..."
3,There is no water for driving the boats.,People typically desires drive boats for fun.,"[3, 970, 16, 117, 514, 13, 1428, 5, 8934, 4, 3..."
4,The poet is busy.,Poet can typically be used for creating poetry.,"[3, 133, 16893, 16, 3610, 4, 3, 26170, 594, 64..."


In [76]:
mask_test=[]
for i in range(len(test_data)):
    padded_seq=tokenizer(test_data['precondition'][i],test_data['statement'][i],padding=True,add_special_tokens=True)
    mask_test.append(padded_seq)
test_data['masked'] =mask_test
df_test_input=input_data_con(test_data['masked'].values)
for key in df_test_input.keys():
    df_test_input[key]=df_test_input[key][:,:max_len]

### Output Prediction Result File

You will need to submit a prediction result file. It should have 2028 lines, every line should be either 0 or 1, which is your model's prediction on the respective test set instance.

In [77]:
results = []

In [78]:
import numpy as np

In [79]:
results=[np.argmax(i) for i in model.predict(df_test_input)] 

In [80]:
# suppose you had your model's predictions on the 2028 test cases read from test_enc_unlabeled.tsv, and 
#those results are in the list called 'results'
assert (len(results) == 4850)

In [81]:
# make sure the results are not float numbers, but intergers 0 and 1
results = [int(x) for x in results]

In [82]:
# write your prediction results to 'upload_predictions.txt' and upload that later
with open('upload_predictions.txt', 'w', encoding = 'utf-8') as fp:
    for x in results:
        fp.write(str(x) + '\n')