In [1]:
import pandas as pd
import numpy as np
import statistics
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
gpu = gpus[0]

tf.config.experimental.set_memory_growth(gpu, True)
import transformers
from sklearn.metrics import confusion_matrix, classification_report



In [2]:
transformers.logging.set_verbosity_error()

In [3]:
data=pd.read_csv('../input/circa-dataset/circa-data.tsv',sep='\t')
data.head()

Unnamed: 0,id,context,question-X,canquestion-X,answer-Y,judgements,goldstandard1,goldstandard2
0,0,Y has just travelled from a different city to ...,Are you employed?,I am employed .,I'm a veterinary technician.,Yes#Yes#Yes#Yes#Yes,Yes,Yes
1,1,X wants to know about Y's food preferences.,Are you a fan of Korean food?,I am a fan of Korean food .,I wouldn't say so,Probably no#No#No#No#Probably yes / sometimes yes,No,No
2,2,Y has just told X that he/she is thinking of b...,Are you bringing any pets into the flat?,I am bringing pets into the flat .,I do not own any pets,No#No#No#No#No,No,No
3,3,X wants to know what activities Y likes to do ...,Would you like to get some fresh air in your f...,I would like to get fresh air in my free time .,I am desperate to get out of the city.,"Yes#Yes, subject to some conditions#Probably y...",Yes,Yes
4,4,X and Y are childhood neighbours who unexpecte...,Is your family still living in the neighborhood?,My family is living in the neighborhood .,My parents are snowbirds now.,"No#In the middle, neither yes nor no#Probably ...","In the middle, neither yes nor no","In the middle, neither yes nor no"


In [4]:
print(data.shape)
data['context'].value_counts()

(34268, 8)


context
Y has just told X that he/she is thinking of buying a flat in New York.             3500
Y has just travelled from a different city to meet X.                               3487
X wants to know about Y's music preferences.                                        3483
Y has just told X that he/she is considering switching his/her job.                 3479
X wants to know what activities Y likes to do during weekends.                      3465
X and Y are colleagues who are leaving work on a Friday at the same time.           3452
X wants to know what sorts of books Y likes to read.                                3445
X and Y are childhood neighbours who unexpectedly run into each other at a cafe.    3391
Y has just moved into a neighbourhood and meets his/her new neighbour X.            3356
X wants to know about Y's food preferences.                                         3210
Name: count, dtype: int64

In [5]:
data['goldstandard1'].value_counts()

goldstandard1
Yes                                              14504
No                                               10829
Yes, subject to some conditions                   2583
Probably yes / sometimes yes                      1244
Probably no                                       1160
In the middle, neither yes nor no                  638
Other                                              504
I am not sure how X will interpret Y’s answer       63
Name: count, dtype: int64

In [6]:
data['goldstandard2'].value_counts()

goldstandard2
Yes                                  16628
No                                   12833
Yes, subject to some conditions       2583
In the middle, neither yes nor no      949
Other                                  504
Name: count, dtype: int64

In [7]:
#data = data.dropna()
print(data.isnull().sum())
# data['goldstandard1'].value_counts()

id                  0
context             0
question-X          0
canquestion-X      10
answer-Y            0
judgements          0
goldstandard1    2743
goldstandard2     771
dtype: int64


In [8]:
data = data.dropna()
data['goldstandard2'].value_counts()

goldstandard2
Yes                                  15745
No                                   11985
Yes, subject to some conditions       2580
In the middle, neither yes nor no      701
Other                                  504
Name: count, dtype: int64

In [9]:
# data['label'] = data['goldstandard1'].copy()

# data['label'] = data['label'].map({'Yes': 0, 'No': 1, 'Yes, subject to some conditions': 2,
#                                   'Probably yes / sometimes yes': 3, 'Probably no': 4,
#                                   'In the middle, neither yes nor no': 5, 'Other': 6,
#                                   'I am not sure how X will interpret Y’s answer': 7})

In [9]:
data['label_2'] = data['goldstandard2'].copy()

data['label_2'] = data['label_2'].map({'Yes': 0, 'No': 1, 'Yes, subject to some conditions': 2,
                                  'In the middle, neither yes nor no': 3, 'Other': 4})

In [10]:
data['label_2'].value_counts()

label_2
0    15745
1    11985
2     2580
3      701
4      504
Name: count, dtype: int64

In [11]:
data.reset_index(drop=True, inplace=True)

In [12]:
#Length of avg question
q = []
for x in range(data.shape[0]):
    q.append(len(data['question-X'][x].split()))
    
#Length of avg answer
a = []
for x in range(data.shape[0]):
    a.append(len(data['answer-Y'][x].split()))
    
print(statistics.mean(q))
print(statistics.mean(a))

6.602189433603046
5.699095668729177


In [13]:
from sklearn.model_selection import train_test_split

train , test = train_test_split(data, test_size = 0.20)

# train['sep_token'] = '[SEP]'
# train['cls_token'] = '[CLS]'
# train['text'] = train['cls_token'] + \
#                     train['context'] + train['sep_token']+ train['question-X'] + \
#                     train['sep_token'] + train['answer-Y'] + \
#                 train['sep_token']


X = train[['question-X', 'answer-Y']]
y = tf.keras.utils.to_categorical(train.label_2, num_classes=5)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.15)

print(train.shape, test.shape)
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(25212, 9) (6303, 9)
(21430, 2)
(3782, 2)
(21430, 5)
(3782, 5)


In [14]:
max_length = None
epochs = 20
batch_size = 32

In [77]:
class BertSemanticDataGenerator(tf.keras.utils.Sequence):
    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=batch_size,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load BERT tokenizer to encode the text
        # base-base-uncased pretrained model
        self.tokenizer = transformers.BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus, a batch of both the sentences are
        # encoded together and separated by [SEP] token
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

        # Set to True if data generator is used for training/validation
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indices after each epoch, if shuffle is set to True
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)

In [16]:
input_ids = tf.keras.layers.Input(
    shape=(max_length,), dtype=tf.int32, name="input_ids"
)
attention_masks = tf.keras.layers.Input(
    shape=(max_length,), dtype=tf.int32, name="attention_masks"
)
token_type_ids = tf.keras.layers.Input(
    shape=(max_length,), dtype=tf.int32, name="token_type_ids"
)
# Loading pretrained BERT model
bert_model = transformers.TFBertModel.from_pretrained("bert-base-uncased")
# Freeze the BERT model to reuse the pretrained features without modifying them
bert_model.trainable = False

bert_output = bert_model(
    input_ids, attention_mask=attention_masks, token_type_ids=token_type_ids
)
sequence_output = bert_output.last_hidden_state
pooled_output = bert_output.pooler_output
#dropout = tf.keras.layers.Dropout(0.1)(pooled_output)
clf_output = sequence_output[:, 0, :]
output = tf.keras.layers.Dense(5, activation="softmax")(clf_output)
model = tf.keras.models.Model(
    inputs=[input_ids, attention_masks, token_type_ids], outputs=output
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss="categorical_crossentropy",
    metrics=["acc"],
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [17]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_masks (InputLayer)   [(None, None)]       0           []                               
                                                                                                  
 token_type_ids (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  109482240   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_masks[0][0]',    

In [18]:
train_data = BertSemanticDataGenerator(
    X_train[['question-X', 'answer-Y']].values.astype("str"),
    y_train,
    batch_size=batch_size,
    shuffle=True,
)
valid_data = BertSemanticDataGenerator(
    X_val[['question-X', 'answer-Y']].values.astype("str"),
    y_val,
    batch_size=batch_size,
    shuffle=False,
)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [19]:
cp_callback = tf.keras.callbacks.ModelCheckpoint('circa_qa_bert_trial.h5',
                                                monitor='val_loss',
                                                mode='min',
                                                save_best_only=True,
                                                save_weights_only=True,
                                                verbose=1)



early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                          mode='min',
                                                          patience=3,
                                                          verbose=1)

callbacks_list = [early_stopping, cp_callback]

In [20]:
y_test = tf.keras.utils.to_categorical(test.label_2, num_classes=5)

In [21]:
test_data = BertSemanticDataGenerator(
    test[["question-X", "answer-Y"]].values.astype("str"),
    y_test,
    batch_size=batch_size,
    shuffle=False,
)

In [22]:
train.head()

Unnamed: 0,id,context,question-X,canquestion-X,answer-Y,judgements,goldstandard1,goldstandard2,label_2
16622,18238,Y has just told X that he/she is thinking of b...,Are you moving to New York?,I am moving to New York .,I'll be living on 14th Street.,Yes#Yes#Yes#Yes#Yes,Yes,Yes,0
1349,1503,X wants to know about Y's music preferences.,Do you like to attend concerts?,I like to attend concerts .,I go to one a week,Yes#Yes#Yes#Probably yes / sometimes yes#Yes,Yes,Yes,0
29744,32326,Y has just moved into a neighbourhood and meet...,Do you play any sports?,I play sports .,"Soccer, football, and tennis.",Yes#Yes#Yes#Yes#Yes,Yes,Yes,0
4261,4702,X and Y are colleagues who are leaving work on...,Is today pay day?,Today is pay day .,I hope so,"In the middle, neither yes nor no#Probably yes...","In the middle, neither yes nor no","In the middle, neither yes nor no",3
29424,31972,X wants to know about Y's food preferences.,Would you prefer a vegetarian restaurant?,I would prefer a vegetarian restaurant .,I would prefer a place where I can order steak.,No#Probably no#No#No#No,No,No,1


In [23]:
history = model.fit(
    train_data,
    validation_data=valid_data,
    epochs=epochs,
    workers=-1, callbacks=callbacks_list,
    verbose=1)

Epoch 1/20




Epoch 1: val_loss improved from inf to 0.68664, saving model to circa_qa_bert_trial.h5
Epoch 2/20
Epoch 2: val_loss improved from 0.68664 to 0.64145, saving model to circa_qa_bert_trial.h5
Epoch 3/20
Epoch 3: val_loss improved from 0.64145 to 0.62393, saving model to circa_qa_bert_trial.h5
Epoch 4/20
Epoch 4: val_loss did not improve from 0.62393
Epoch 5/20
Epoch 5: val_loss improved from 0.62393 to 0.60449, saving model to circa_qa_bert_trial.h5
Epoch 6/20
Epoch 6: val_loss improved from 0.60449 to 0.58968, saving model to circa_qa_bert_trial.h5
Epoch 7/20
Epoch 7: val_loss improved from 0.58968 to 0.58735, saving model to circa_qa_bert_trial.h5
Epoch 8/20
Epoch 8: val_loss did not improve from 0.58735
Epoch 9/20
Epoch 9: val_loss did not improve from 0.58735
Epoch 10/20
Epoch 10: val_loss improved from 0.58735 to 0.58693, saving model to circa_qa_bert_trial.h5
Epoch 11/20
Epoch 11: val_loss improved from 0.58693 to 0.57561, saving model to circa_qa_bert_trial.h5
Epoch 12/20
Epoch 12:

In [24]:
# Unfreeze the bert_model
bert_model.trainable = True
# Recompile and fit again with smaller learning rate for fine-tuning
model.compile(
    optimizer=tf.keras.optimizers.Adam(3e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_masks (InputLayer)   [(None, None)]       0           []                               
                                                                                                  
 token_type_ids (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_bert_model (TFBertModel)    TFBaseModelOutputWi  109482240   ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_masks[0][0]',    

In [25]:
history = model.fit(
    train_data,
    validation_data=valid_data,
    epochs=3,
    workers=-1,verbose=1,callbacks=callbacks_list
)

Epoch 1/3
Epoch 1: val_loss improved from 0.56739 to 0.35719, saving model to circa_qa_bert_trial.h5
Epoch 2/3
Epoch 2: val_loss improved from 0.35719 to 0.33621, saving model to circa_qa_bert_trial.h5
Epoch 3/3
Epoch 3: val_loss did not improve from 0.33621


In [26]:
model.evaluate(test_data, verbose=1)



[0.44557836651802063, 0.8899872303009033]

In [79]:
# test_data_pred = BertSemanticDataGenerator(
#     test[["question-X", "answer-Y"]].values.astype("str"),
#     labels=None,
#     batch_size=test.shape[0],
#     include_targets=False,
#     shuffle=False
# )

# predicted = model.predict([test_data_pred])
# predicted.shape

# predicted_vector = predicted.copy()

# predicted_vector[predicted_vector > 0.5] = 1
# predicted_vector[predicted_vector <= 0.5] = 0
# predicted_vector


# print(classification_report(y_test, predicted_vector))

In [41]:
data['goldstandard2'].value_counts(), data['label_2'].value_counts()

(goldstandard2
 Yes                                  15745
 No                                   11985
 Yes, subject to some conditions       2580
 In the middle, neither yes nor no      701
 Other                                  504
 Name: count, dtype: int64,
 label_2
 0    15745
 1    11985
 2     2580
 3      701
 4      504
 Name: count, dtype: int64)

In [42]:
labels = list(set(data['label_2']))
labels_text = list(data['goldstandard2'].value_counts().index)

labels, labels_text

([0, 1, 2, 3, 4],
 ['Yes',
  'No',
  'Yes, subject to some conditions',
  'In the middle, neither yes nor no',
  'Other'])

In [43]:
def check_indirect(question, answer):
    sentence_pairs = np.array([[str(question), str(answer)]])
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data[0])[0]
    print(proba)
    idx = np.argmax(proba)
    print(idx)
    proba = f"{proba[idx]: .2f}%"
    pred = labels_text[idx]
    return pred, proba

In [44]:
question = "Do you like Italian food?"
answer = "I just had an awesome pasta yesterday for dinner!"
check_indirect(question, answer)



[9.99886632e-01 1.90073831e-06 7.91925231e-06 1.03301245e-04
 2.42448152e-07]
0


('Yes', ' 1.00%')

### Completely unseen examples; BERT

In [80]:
question = "Did you eat the last piece of cake?"
answer = "I really enjoyed it. It was delicious!"
check_indirect(question, answer)

[9.9981278e-01 3.2126896e-05 1.4676831e-07 1.5190245e-04 3.0277970e-06]
0


('Yes', ' 1.00%')

In [81]:
question =  "Are you coming to the meeting tomorrow?"
answer = "I'll need to check my schedule and see what's going on."
check_indirect(question, answer)

[2.5426280e-02 8.5577679e-01 6.7016357e-03 1.1209398e-01 1.3051811e-06]
1


('No', ' 0.86%')

In [82]:
question =  "Did you finish the report I asked you to complete?"
answer = "I worked on it for a while, but I got busy with other things."
check_indirect(question, answer)

[2.0725550e-03 9.9427915e-01 1.5510622e-06 3.6467698e-03 2.9226713e-08]
1


('No', ' 0.99%')

In [83]:
question =   "Have you decided on your vacation destination?"
answer = "I'm thinking about a few places, but I need to check my budget first."
check_indirect(question, answer)

[4.5017678e-05 9.9720675e-01 1.7755976e-05 2.7304005e-03 4.9983829e-08]
1


('No', ' 1.00%')

In [84]:
question = "Did you enjoy the service that we provided?"
answer = "I would have to say yes to disappointment I'm afraid."
check_indirect(question, answer)

[7.3138118e-04 9.7705680e-01 4.2767674e-04 2.1784218e-02 5.7693395e-08]
1


('No', ' 0.98%')

In [87]:
question = "Did you like the book you just finished?"
answer = "The ending was unexpected, but I had mixed feelings about the characters."
check_indirect(question, answer)

[7.0891581e-03 1.1050723e-01 4.0677926e-03 8.7833494e-01 8.5300246e-07]
3


('In the middle, neither yes nor no', ' 0.88%')

In [88]:
question = "Are you willing to contribute to the charity?"
answer = "I'm open to it, provided it's a cause I believe in."
check_indirect(question, answer)

[2.2333125e-02 4.9651385e-06 9.7673029e-01 9.3160651e-04 1.3586288e-08]
2


('Yes, subject to some conditions', ' 0.98%')

In [90]:
question = "Want to go to the waterpark this weekend?"
answer = "Let's get soaked!"
check_indirect(question, answer)

[9.9999332e-01 1.8697001e-06 2.2875035e-06 2.4247577e-06 1.4637591e-10]
0


('Yes', ' 1.00%')

In [91]:
question = "Will you support the new policy?"
answer = "Haven't made up my mind yet."
check_indirect(question, answer)

[6.6272798e-05 2.2034863e-02 4.8152498e-07 9.7789842e-01 1.1180090e-09]
3


('In the middle, neither yes nor no', ' 0.98%')