# Baseline Models

In [1]:
##############################################################################################################################
# Load Data
##############################################################################################################################

#==================================================================
# Import Standard Libraries
#==================================================================

from theory_of_mind import *

# Set global parameters
TEMPERATURE = 1
EPOCHS = 20

#==================================================================
# Set Up Data Manager
#==================================================================

dm = DataManager()

bert = dm.load_all_data("bert", True)
print("bert embeddings loaded...")

use = dm.load_all_data("use", False)
print("use embeddings loaded...")

#==================================================================
# Load Labels
#==================================================================

labels = dm.load_all_labels()

#==================================================================
# Format Data
#==================================================================

def concatenate_inputs(df):
    '''Creates concatenated input for word embeddings.'''
    sentences = cc([df["sentence1"], df["sentence2"]])
    difference = df["sentence1"] - df["sentence2"]
    product = np.multiply(df["sentence1"], df["sentence2"])
    return cc([sentences, difference, product])

def format_data_for_training(data, labels):
    '''Returns data and labels for training.'''

    # Transform data
    trD = concatenate_inputs(data["train"])
    evD = concatenate_inputs(data["dev"])

    # Transform labels
    trL = labels["train"]
    evL = labels["dev"]

    print("Training Label Shape: {}".format(trL.shape))
    print("Eval Label Shape: {}".format(evL.shape))
    print("Training Data Shape: {}".format(trD.shape))
    print("Eval Data Shape: {}".format(evL.shape))

    return trD, trL, evD, evL

# Format data
trD_use, trL, evD_use, evL = format_data_for_training(use, labels)
trD_bert, trL, evD_bert, evL = format_data_for_training(bert, labels)

Using TensorFlow backend.


bert embeddings loaded...
use embeddings loaded...
Training Label Shape: (78734, 3)
Eval Label Shape: (9842, 3)
Training Data Shape: (78734, 2048)
Eval Data Shape: (9842, 3)
Training Label Shape: (78734, 3)
Eval Label Shape: (9842, 3)
Training Data Shape: (78734, 3072)
Eval Data Shape: (9842, 3)


In [2]:
def build_default_model(name, trD, trL, evD, evL):
    '''Builds and compiles a default model.'''
    
    # Initial layers
    inputs = layers.Input(shape= trD.shape[1])
    outputs = add_layer_for_dense(inputs, name, trD.shape[1])
    
    # Add "partition layer" for easy splitting
    outputs = layers.Lambda(lambda x: x, name = "partition")(outputs)
    
    # Add final layer then compile
    outputs = add_layer_for_dense(outputs, name, 256)
    outputs = layers.Dense(trL.shape[1], name = "logits")(outputs)
    outputs = layers.Activation('softmax', name='softmax')(outputs)
    model = compile_model(name, inputs, outputs)
    
    # Add data as attributes
    model.trD_ = trD
    model.evD_ = evD
    model.trL_ = trL
    model.evL_ = evL
    model.status_ = "..."
    model.save_name_ = model.name_
    
    return model


def fit_model(model, epochs = 5):
    '''Fits model with training and validation data.'''
    
    banner("Running model {}...".format(model.name_), symbol = "=")
    
    history = model.fit(model.trD_, 
                        model.trL_, 
                        validation_data=[model.evD_, model.evL_], 
                        batch_size = 32,
                        epochs=epochs, 
                        verbose=1)
    return history
    
def compile_model(name, inputs, outputs, 
                  loss = 'categorical_crossentropy', 
                  optimizer = 'adam',  
                  metrics = ['accuracy']):
    '''Compiles model. Set as function to preserve defaults.'''
    
    model = keras.models.Model(inputs = inputs, outputs = outputs, name = name)
    model.name_ = model.name
    
    model.compile(loss = loss,  
                  optimizer = optimizer, 
                  metrics = metrics)
    return model

# Add data as attributes
def add_attributes(model, name, trD, trL, evD, evL):
    model.name_ = name
    model.trD_ = trD
    model.evD_ = evD
    model.trL_ = trL
    model.evL_ = evL
    model.status_ = "..."
    model.save_name_ = model.name_
    return model

def save_model_and_history(model, fp = "Final Project/models/"):
    '''Saves model and history.'''
    
    banner("Saving model {}...".format(model.name_))
    
    # Save model
    model.save("{}{}".format(fp, model.save_name_))
    
    # Save history
    history = pd.DataFrame(model.history.history)
    history.to_pickle("{}{}_history.pkl".format(fp, model.save_name_))
    
def load_model_and_history(model_name):
    '''Loads model and its history'''
    model = tf.keras.models.load_model("Final Project/models/{}".format(model_name))
    history = pd.read_pickle("Final Project/models/{}_history.pkl".format(model_name))
    return model, history

# Baseline Models (Zero-Order Theory of Mind)

Here we run and save models that try to capture the true state of the world.

In [None]:
#==================================================================
# Universal Sentence Encoder Baseline
#==================================================================

use0 = build_default_model("use_baseline", trD_use, trL, evD_use, evL)
history = fit_model(use0, epochs = 20)
save_model_and_history(use0)

# Baseline (DistilBert)

In [8]:
#==================================================================
# Format Data
#==================================================================

bert0 = build_default_model("bert_baseline", trD_bert, trL, evD_bert, evL)
fit_model(bert0, epochs = 20)
save_model_and_history(bert0)


Running model bert_baseline...

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Saving model bert_baseline...



In [None]:
#======================================================================
# Run Model Diagnostics
#======================================================================

use_report = run_model_diagnostics(use0, evD_use, evL, dvSen)
bert_report = run_model_diagnostics(bert0, evD_bert, evL, dvSen)

use_report.to_pickle("scratch/use_report.pkl")
bert_report.to_pickle("scratch/bert_report.pkl")

# First Order Theory of Mind

In [6]:
#======================================================================
# Load Models
#======================================================================

# Load model weights
use0, _ = load_model_and_history("use_baseline")
bert0, _ = load_model_and_history("bert_baseline")

In [7]:
#======================================================================
# Use predictions from other model as labels
#======================================================================


# Get temperature softened training and evaulation labels
trL_bert_softened = get_soft_predictions(bert0, trD_bert, TEMPERATURE)
evL_bert_softened = get_soft_predictions(bert0, evD_bert, TEMPERATURE)

trL_use_softened = get_soft_predictions(use0, trD_use, TEMPERATURE)
evL_use_softened = get_soft_predictions(use0, evD_use, TEMPERATURE)

In [6]:
#======================================================================
# Use predictions from other model as labels
#======================================================================

def get_soft_predictions(teacher, data, temperature, col = "logits"):
    '''Computes softened predictions from teacher model on input data.'''
    temp = tf.keras.models.Model(inputs = teacher.input, outputs = teacher.get_layer(col).output)
    preds = temp.predict(data) / temperature
    return np.apply_along_axis(lambda preds: np.exp(preds) / np.sum(np.exp(preds)), 1, preds)


def adjust_for_temperature(model, name, temperature, input_size = 512):
    # Build and compile model using loss weights 
    
    model.name_ = name
    model = add_distillation_arm(model, input_size)

    model.compile(loss = "categorical_crossentropy", 
                  optimizer = "adam",
                  loss_weights = {"distill_softmax": temperature ** 2, "softmax": 1},
                  metrics = ["acc"])
    
    return model

def add_distillation_arm(model, temperature = 1, input_size = 512):
    '''Builds and compiles a default model.'''
    
    # Get model name, inputs, outputs, and output size
    name = model.name_
    input_col_size = 512
    output_col_size = 3
    
    # Get outputs of model "partition" layer, then add distillation layer
    partition = model.get_layer("partition")
    outputs = add_layer_for_dense(partition.output, name, 128)
    distill = add_distillation_layers(name, 
                                      outputs, 
                                      temperature, 
                                      output_col_size)
    
    return tf.keras.models.Model(name = name, 
                                 inputs =  model.input, 
                                 outputs = [model.output, distill])
    
    
def add_distillation_layers(name, outputs, temperature, output_col_size):
    '''Returns a set of layers for distillation learning.'''
    distill = layers.Dense(output_col_size, name = "distill_logits")(outputs)
    distill = layers.Activation('softmax',name = "distill_softmax")(distill)
    return distill   

In [None]:
#======================================================================
# Use Predicts BERT
#======================================================================

# Add labels alongside other model meta-data
use1, _ = load_model_and_history("use_baseline")
use1 = adjust_for_temperature(use1, "use_order_1_temp_1", TEMPERATURE)
use1 = add_attributes(use1, "use_order_1_temp_1", trD_use, [trL, trL_bert_softened], evD_use, [evL, evL_bert_softened])

fit_model(use1, epochs = 20)
save_model_and_history(use1)

In [9]:
#======================================================================
# BERT Predicts USE
#======================================================================

# Add labels alongside other model meta-data
bert1, _ = load_model_and_history("bert_baseline")
bert1 = adjust_for_temperature(bert1, "bert_order_1_temp_1", TEMPERATURE, 768)
bert1 = add_attributes(bert1, "bert_order_1", trD_bert, [trL, trL_use_softened], evD_bert, [evL, evL_use_softened])

fit_model(bert1, epochs = 20)
save_model_and_history(bert1)


Running model bert_order_1...

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Saving model bert_order_1...



In [19]:
#======================================================================
# Run Model Diagnostics
#======================================================================

use1, _ = load_model_and_history("use_order_1_temp_1")
use1.name_ = "use_order_1_temp_1"

use_report = run_model_diagnostics(use1, evD_use, evL, dvSen)
bert_report = run_model_diagnostics(bert1, evD_bert, evL, dvSen)

use_report.to_pickle("scratch/use_tom_1_report.pkl")
bert_report.to_pickle("scratch/bert_tom1_report.pkl")


use_order_1_temp_1

               precision    recall  f1-score   support

contradiction       0.84      0.74      0.79      3278
   entailment       0.78      0.84      0.81      3329
      neutral       0.72      0.75      0.74      3235

     accuracy                           0.78      9842
    macro avg       0.78      0.78      0.78      9842
 weighted avg       0.78      0.78      0.78      9842

Index(['neutral', 'entailment', 'contradiction'], dtype='object')
[[0.24730746 0.03078643 0.0549685 ]
 [0.01442796 0.28449502 0.03932128]
 [0.03322495 0.0473481  0.2481203 ]]

bert_order_1

               precision    recall  f1-score   support

contradiction       0.77      0.59      0.67      3278
   entailment       0.63      0.83      0.72      3329
      neutral       0.69      0.63      0.66      3235

     accuracy                           0.69      9842
    macro avg       0.70      0.68      0.68      9842
 weighted avg       0.70      0.69      0.68      9842

Index(['neutr

# Second-Order Theory of Mind

In [11]:
# Get temperature softened training and evaulation labels

bert1, _ = load_model_and_history("bert_order_1")
bert1.name_ = "bert_order_1"

use1, _ = load_model_and_history("use_order_1_temp_1")
use1.name_ = "use_order_1_temp_1"

trL_bert_softened = get_soft_predictions(bert1, trD_bert, TEMPERATURE)
evL_bert_softened = get_soft_predictions(bert1, evD_bert, TEMPERATURE)

trL_use_softened = get_soft_predictions(use1, trD_use, TEMPERATURE)
evL_use_softened = get_soft_predictions(use1, evD_use, TEMPERATURE)

In [27]:
#======================================================================
# Use Predicts BERT
#======================================================================

# Add labels alongside other model meta-data
use2, _ = load_model_and_history("use_baseline")
use2 = adjust_for_temperature(use2, "use_order_2_temp_1", TEMPERATURE, 512)
use2 = add_attributes(use2, "use_order_2_temp_1", trD_use, [trL, trL_bert_softened], evD_use, [evL, evL_bert_softened])

fit_model(use2, epochs = 20)
save_model_and_history(use2)


Running model use_order_2_temp_1...

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Saving model use_order_2_temp_1...



In [15]:
#======================================================================
# BERT Predicts USE
#======================================================================

def add_layer_for_dense(inputs, name, nodes, activation = "relu", dropout = 0.2):
    '''Add a dense layer with a size specified by nodes.'''
    
    init = initializers.glorot_normal()
    k_regs = regularizers.l1_l2(l1=1e-5, l2=1e-4)
    
    x = layers.Dense(nodes, 
                     activation = activation, 
                     kernel_initializer = init,
                     kernel_regularizer= k_regs, name = "dense_layer_500")(inputs)
    x = layers.BatchNormalization(name = "tosst")(x)
    x = layers.Dropout(dropout, name = "temp")(x)
    return x

def fit_model(model, epochs = 5):
    '''Fits model with training and validation data.'''
    
    banner("Running model {}...".format(model.name_), symbol = "=")
    
    history = model.fit(model.trD_, 
                        model.trL_, 
                        validation_data=[model.evD_, model.evL_], 
                        batch_size = 128,
                        epochs=epochs, 
                        verbose=1)
    return history

# Add labels alongside other model meta-data
bert2, _ = load_model_and_history("bert_baseline")
bert2 = adjust_for_temperature(bert2, "bert_order_2_temp_1", TEMPERATURE, 768)
bert2 = add_attributes(bert2, "bert_order_2", trD_bert, [trL, trL_use_softened], evD_bert, [evL, evL_use_softened])

fit_model(bert2, epochs = 20)
save_model_and_history(bert2)


Running model bert_order_2...

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Saving model bert_order_2...



In [28]:
#======================================================================
# Run Model Diagnostics
#======================================================================

use2, _ = load_model_and_history("use_order_2_temp_1")
use2.name_ = use2.name

use2_report = run_model_diagnostics(use2, evD_use, evL, dvSen)
bert2_report = run_model_diagnostics(bert2, evD_bert, evL, dvSen)

use_report.to_pickle("scratch/use_tom_2_report.pkl")
bert_report.to_pickle("scratch/bert_tom_2_report.pkl")


use_order_2_temp_1

               precision    recall  f1-score   support

contradiction       0.78      0.82      0.80      3278
   entailment       0.83      0.78      0.81      3329
      neutral       0.74      0.74      0.74      3235

     accuracy                           0.78      9842
    macro avg       0.78      0.78      0.78      9842
 weighted avg       0.78      0.78      0.78      9842

Index(['neutral', 'entailment', 'contradiction'], dtype='object')
[[0.27474091 0.01869539 0.03962609]
 [0.02631579 0.26427555 0.04765292]
 [0.05151392 0.03454582 0.24263361]]

bert_order_2

               precision    recall  f1-score   support

contradiction       0.81      0.44      0.57      3278
   entailment       0.64      0.77      0.70      3329
      neutral       0.59      0.74      0.66      3235

     accuracy                           0.65      9842
    macro avg       0.68      0.65      0.64      9842
 weighted avg       0.68      0.65      0.64      9842

Index(['neutr

# Model Analysis and Diagnostics

In [82]:
def run_model_diagnostics(model, data, labels, sentences, theory_of_mind_level):

    banner(model.name_)
    
    cols = labels.columns
    preds = model.predict(data)
    if isinstance(preds, list):
        preds = preds[0]
    pred_labels = cols[preds.argmax(axis=-1)]
    true = labels.idxmax(axis=1)

    # Print reports
    cr = classification_report(true, pred_labels, output_dict = True)
    cm = confusion_matrix(true, pred_labels, normalize = "all")
    print(classification_report(true, pred_labels))
    print(cols)
    print(cm)
    
    cr = pd.DataFrame(cr)
    cr["name"] = model.name_
    cr["tom_level"] = theory_of_mind_level
    cr["metric"] = cr.index
    cr = cr[cr["metric"] != "support"]
    
    # Build report dataframe
    report = pd.DataFrame({"true": true, "predicted": pred_labels}) 
    report["correct"] = (report["true"] == report["predicted"])
    
    
    # Add sentences
    report = pd.concat([sentences.reset_index() , report.reset_index() ], axis = 1)
    
    report["sentence1_length"] = report["sentence1"].apply(lambda row: len(str(row).split()))
    report["sentence2_length"] = report["sentence2"].apply(lambda row: len(str(row).split()))
    report["sentence_difference"] = report["sentence1_length"] - report["sentence2_length"]
    
    report["name"] = model.name_
    report["tom_level"] = theory_of_mind_level
    
    return report, cr, cm

In [None]:
# Load models
use0, _ = load_model_and_history("use_baseline")
bert0, _ = load_model_and_history("bert_baseline")

In [83]:
#======================================================================
# Zero-Order
#======================================================================

use0.name_ = "use"
bert0.name_ = "bert"

use0_report, use0_cr, use0_cm = run_model_diagnostics(use0, evD_use, evL, dvSen, 0)
bert0_report, bert0_cr, bert0_cm  = run_model_diagnostics(bert0, evD_bert, evL, dvSen, 0)


use

               precision    recall  f1-score   support

contradiction       0.79      0.80      0.80      3278
   entailment       0.79      0.82      0.81      3329
      neutral       0.76      0.72      0.74      3235

     accuracy                           0.78      9842
    macro avg       0.78      0.78      0.78      9842
 weighted avg       0.78      0.78      0.78      9842

Index(['neutral', 'entailment', 'contradiction'], dtype='object')
[[0.26722211 0.02529974 0.04054054]
 [0.02540134 0.27829709 0.03454582]
 [0.04582402 0.04775452 0.23511481]]

bert

               precision    recall  f1-score   support

contradiction       0.62      0.68      0.65      3278
   entailment       0.65      0.74      0.69      3329
      neutral       0.71      0.54      0.61      3235

     accuracy                           0.65      9842
    macro avg       0.66      0.65      0.65      9842
 weighted avg       0.66      0.65      0.65      9842

Index(['neutral', 'entailment', 'con

In [84]:
use1.name_ = "use"
bert1.name_ = "bert"

use1_report, use1_cr, use1_cm = run_model_diagnostics(use1, evD_use, evL, dvSen, 1)
bert1_report, bert1_cr, bert1_cm = run_model_diagnostics(bert1, evD_bert, evL, dvSen, 1)


use

               precision    recall  f1-score   support

contradiction       0.84      0.74      0.79      3278
   entailment       0.78      0.84      0.81      3329
      neutral       0.72      0.75      0.74      3235

     accuracy                           0.78      9842
    macro avg       0.78      0.78      0.78      9842
 weighted avg       0.78      0.78      0.78      9842

Index(['neutral', 'entailment', 'contradiction'], dtype='object')
[[0.24730746 0.03078643 0.0549685 ]
 [0.01442796 0.28449502 0.03932128]
 [0.03322495 0.0473481  0.2481203 ]]

bert

               precision    recall  f1-score   support

contradiction       0.77      0.59      0.67      3278
   entailment       0.63      0.83      0.72      3329
      neutral       0.69      0.63      0.66      3235

     accuracy                           0.69      9842
    macro avg       0.70      0.68      0.68      9842
 weighted avg       0.70      0.69      0.68      9842

Index(['neutral', 'entailment', 'con

In [85]:
use2.name_ = "use"
bert2.name_ = "bert"

use2_report, use2_cr, use2_cm = run_model_diagnostics(use2, evD_use, evL, dvSen, 2)
bert2_report, bert2_cr, bert2_cm = run_model_diagnostics(bert2, evD_bert, evL, dvSen, 2)


use

               precision    recall  f1-score   support

contradiction       0.78      0.82      0.80      3278
   entailment       0.83      0.78      0.81      3329
      neutral       0.74      0.74      0.74      3235

     accuracy                           0.78      9842
    macro avg       0.78      0.78      0.78      9842
 weighted avg       0.78      0.78      0.78      9842

Index(['neutral', 'entailment', 'contradiction'], dtype='object')
[[0.27474091 0.01869539 0.03962609]
 [0.02631579 0.26427555 0.04765292]
 [0.05151392 0.03454582 0.24263361]]

bert

               precision    recall  f1-score   support

contradiction       0.81      0.44      0.57      3278
   entailment       0.64      0.77      0.70      3329
      neutral       0.59      0.74      0.66      3235

     accuracy                           0.65      9842
    macro avg       0.68      0.65      0.64      9842
 weighted avg       0.68      0.65      0.64      9842

Index(['neutral', 'entailment', 'con

In [95]:
#======================================================================
# Assemble into one dataframe
#======================================================================

reports = pd.concat([use0_report, bert0_report, use1_report, bert1_report, use2_report, bert2_report], axis = 1)
reports.head(50)

Unnamed: 0,index,sentence1,sentence2,label,index.1,true,predicted,correct,sentence1_length,sentence2_length,...,label.1,index.2,true.1,predicted.1,correct.1,sentence1_length.1,sentence2_length.1,sentence_difference,name,tom_level
0,0,Two women are embracing while holding to go pa...,The sisters are hugging goodbye while holding ...,neutral,0,neutral,neutral,True,9,14,...,neutral,0,neutral,neutral,True,9,14,-5,bert,2
1,1,Two women are embracing while holding to go pa...,Two woman are holding packages.,entailment,1,entailment,entailment,True,9,5,...,entailment,1,entailment,entailment,True,9,5,4,bert,2
2,2,Two women are embracing while holding to go pa...,The men are fighting outside a deli.,contradiction,2,contradiction,contradiction,True,9,7,...,contradiction,2,contradiction,neutral,False,9,7,2,bert,2
3,3,"Two young children in blue jerseys, one with t...",Two kids in numbered jerseys wash their hands.,entailment,3,entailment,neutral,False,32,8,...,entailment,3,entailment,neutral,False,32,8,24,bert,2
4,4,"Two young children in blue jerseys, one with t...",Two kids at a ballgame wash their hands.,neutral,4,neutral,entailment,False,32,8,...,neutral,4,neutral,neutral,True,32,8,24,bert,2
5,5,"Two young children in blue jerseys, one with t...",Two kids in jackets walk to school.,contradiction,5,contradiction,neutral,False,32,7,...,contradiction,5,contradiction,contradiction,True,32,7,25,bert,2
6,6,A man selling donuts to a customer during a wo...,A woman drinks her coffee in a small cafe.,contradiction,6,contradiction,contradiction,True,18,9,...,contradiction,6,contradiction,contradiction,True,18,9,9,bert,2
7,7,A man selling donuts to a customer during a wo...,A man selling donuts to a customer during a wo...,neutral,7,neutral,entailment,False,18,19,...,neutral,7,neutral,neutral,True,18,19,-1,bert,2
8,8,A man selling donuts to a customer during a wo...,A man selling donuts to a customer.,entailment,8,entailment,entailment,True,18,7,...,entailment,8,entailment,neutral,False,18,7,11,bert,2
9,9,Two young boys of opposing teams play football...,boys play football,entailment,9,entailment,entailment,True,15,3,...,entailment,9,entailment,entailment,True,15,3,12,bert,2


In [107]:
cr = pd.concat([use0_cr, bert0_cr, use1_cr, bert1_cr, use2_cr, bert2_cr])

# cr = cr[cr["metric"] == "precision"]
cols = ["name", "tom_level", "metric", "entailment", "neutral", "contradiction"]
cr = cr[cols].sort_values(["name", "tom_level"])
cr.head(50)

Unnamed: 0,name,tom_level,metric,entailment,neutral,contradiction
precision,bert,0,precision,0.653887,0.709364,0.616469
recall,bert,0,recall,0.737759,0.540958,0.680598
f1-score,bert,0,f1-score,0.693296,0.61382,0.646948
precision,bert,1,precision,0.632952,0.692515,0.768955
recall,bert,1,recall,0.83088,0.63493,0.587858
f1-score,bert,1,f1-score,0.718535,0.662474,0.666321
precision,bert,2,precision,0.635778,0.594899,0.805755
recall,bert,2,recall,0.770802,0.735394,0.444173
f1-score,bert,2,f1-score,0.696809,0.657727,0.572665
precision,use,0,precision,0.792076,0.757943,0.789553
