## Import Data

In [1]:
import numpy as np 
import pandas as pd 
import json

from google.colab import drive
drive.mount('/content/gdrive')
data_path = '/content/gdrive/MyDrive/MIDS/W266: NLP'

Mounted at /content/gdrive


In [2]:
!pip install transformers --quiet
!pip install -U tensorflow==2.11 --quiet
!pip install sentencepiece --quiet
!pip install nltk --quiet
!pip install Datasets --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m105.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m588.3/588.3 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.2/439.2 kB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m78.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m80.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m74.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from tensorflow import keras
from transformers import BertTokenizer, TFBertModel
import tensorflow as tf
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from datasets import load_dataset, concatenate_datasets, Dataset
from sklearn.metrics import classification_report
print(tf.__version__)

2.11.0


## Data Pipeline

In [4]:
sentfin_df = pd.read_csv(data_path + "/SEntFiN-v1.1.csv")
entities_df = pd.read_csv(data_path + "/entity_list_comprehensive.csv")
fiqa_df = pd.read_csv(data_path + "/fiqa_cleaned.csv")
semval_df = pd.read_csv(data_path + "/semval_cleaned.csv")

In [5]:
fiqa_df.rename(columns = {'title':'Title'}, 
               inplace = True)
fiqa_df = fiqa_df[["Title", "sentiment", "company", "Decisions"]]
semval_df.rename(columns = {'title':'Title'}, 
               inplace = True)
semval_df = semval_df [["Title", "sentiment", "company", "Decisions"]]

In [6]:
fiqa_df = pd.concat([fiqa_df, semval_df])

In [7]:
def num_entities(x):
  n = x.Decisions.count(":")
  return n

def get_sent_dict(x):
  sent_dict = json.loads(x.Decisions)
  return sent_dict

def get_entities(x):
  sent_dict = json.loads(x.Decisions)
  entities = list(sent_dict.keys())
  return entities

def get_sents(x):
  sent_dict = json.loads(x.Decisions)
  sent_vals = list(sent_dict.values())
  return sent_vals

def get_sentence_target(x):
  sentences = []
  for t in x.entities:
    s = x.Title
    s = s.replace(t, "TGT")

    sentences.append(s)
  return sentences

def get_entities_str(x):
  ent_str = ', '.join(x.entities)
  return ent_str

sentfin_df["num_entities"] = sentfin_df.apply(num_entities, axis=1)
sentfin_df["sent_dict"] = sentfin_df.apply(get_sent_dict, axis=1)
sentfin_df["entities"] = sentfin_df.apply(get_entities, axis=1)
sentfin_df["entities_str"] = sentfin_df.apply(get_entities_str, axis=1)
sentfin_df["sent_vals"] = sentfin_df.apply(get_sents, axis=1)
sentfin_df['sentences_input'] = sentfin_df.apply(get_sentence_target, axis=1)


In [8]:
def get_conflicting_sents(df):
  count = 0
  for index, row in df.iterrows():
    if "negative" in row.sent_vals:
      if "positive" in row.sent_vals:
        print(row.Title)
        print(row.Decisions)
        count += 1
  return count

num_conflict = get_conflicting_sents(sentfin_df)
print(num_conflict)

Gold shines on seasonal demand; Silver dull
{"Gold": "positive", "Silver": "negative"}
Larger companies reverse trend, beat SMEs in Q3 results
{"Larger companies": "positive", "SMEs": "negative"}
Rice bran oil set to smoke out olive oil
{"Rice bran oil": "positive", "olive oil": "negative"}
War clouds over Crimea: Sensex ends below 21K, gold glitters
{"gold": "positive", "Sensex": "negative"}
Dollar hits 6-year high vs yen on Fed study, sterling woes continue
{"Dollar": "positive", "sterling": "negative", "yen": "neutral"}
State-run oil retailers gain on weak crude prices
{"oil retailers": "positive", "crude": "negative"}
Nikkei rises as yen plumbs 6-year lows
{"Nikkei": "positive", "yen": "negative"}
Large-caps better than mid-caps IT cos: Prateek Agarwal
{"Large-caps": "positive", "mid-caps IT cos": "negative"}
Export-oriented cos to outdo domestic cyclicals: Hemindra Hazari
{"Export-oriented cos": "positive", "domestic cyclicals": "negative"}
Lacklustre factory data hits world stock

In [9]:
def get_fiqa_entities(x):
  l = x.company[1:-1].replace("'", '').split(", ")
  return l

def get_fiqa_sents(x):
  #print(x.Decisions.replace("'",'"'))
  try:
    sent_dict = json.loads(x.Decisions.replace("'",'"'))
    sent_vals = list(sent_dict.values())
    if len(sent_vals) != len(x.entities):
      sent_vals = " "

  except:
    sent_vals = " "
  return sent_vals


fiqa_df["entities"] = fiqa_df.apply(get_fiqa_entities, axis=1)
fiqa_df['sentences_input'] = fiqa_df.apply(get_sentence_target, axis=1)
fiqa_df["sent_vals"] = fiqa_df.apply(get_fiqa_sents, axis=1)
fiqa_df = fiqa_df[fiqa_df.sent_vals != " "]

In [10]:
## Train, Validation, Test Data
train_df, valid_test_df = train_test_split(sentfin_df, test_size=0.3,  random_state=8)
test_df, valid_df = train_test_split(valid_test_df, test_size=0.666, random_state=8)

fiqa_train_df, fiqa_valid_df = train_test_split(fiqa_df, test_size=0.3,  random_state=8)


In [11]:
## Bert data

bert_train_input = []
for index, row in train_df.iterrows():
  bert_train_input.extend(row.sentences_input)

bert_valid_input = []
for index, row in valid_df.iterrows():
  bert_valid_input.extend(row.sentences_input)

bert_test_input = []
for index, row in test_df.iterrows():
  bert_test_input.extend(row.sentences_input)

bert_train_labels = []
for index, row in train_df.iterrows():
  bert_train_labels.extend(row.sent_vals)

bert_valid_labels = []
for index, row in valid_df.iterrows():
  bert_valid_labels.extend(row.sent_vals)

bert_test_labels = []
for index, row in test_df.iterrows():
  bert_test_labels.extend(row.sent_vals)

from sklearn.preprocessing import LabelEncoder  

le = LabelEncoder()
bert_train_labels = le.fit_transform(bert_train_labels)
bert_valid_labels = le.fit_transform(bert_valid_labels)
bert_test_labels = le.fit_transform(bert_test_labels)

In [12]:
## FiQA

fiqa_bert_train_input = []
for index, row in fiqa_train_df.iterrows():
  fiqa_bert_train_input.extend(row.sentences_input)

fiqa_bert_valid_input = []
for index, row in fiqa_valid_df.iterrows():
  fiqa_bert_valid_input.extend(row.sentences_input)

fiqa_bert_train_labels = []
for index, row in fiqa_train_df.iterrows():
  fiqa_bert_train_labels.extend(row.sent_vals)

fiqa_bert_valid_labels = []
for index, row in fiqa_valid_df.iterrows():
  fiqa_bert_valid_labels.extend(row.sent_vals)

from sklearn.preprocessing import LabelEncoder  

le = LabelEncoder()
fiqa_bert_train_labels = le.fit_transform(fiqa_bert_train_labels)
fiqa_bert_valid_labels = le.fit_transform(fiqa_bert_valid_labels)


In [13]:
print(len(fiqa_bert_train_input),
      len(fiqa_bert_train_labels),
      len(fiqa_bert_valid_input),
      len(fiqa_bert_valid_labels))

1131 1131 477 477


In [14]:
print(len(bert_train_input), 
      len(bert_valid_input), 
      len(bert_test_input))

10121 2832 1456


# Models


## BERT


In [None]:
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
bert_model = TFBertModel.from_pretrained('bert-base-cased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/527M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
max_length = 30

bert_train_tokenized = bert_tokenizer(bert_train_input,       
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

bert_train_labels = np.array(bert_train_labels)

bert_valid_tokenized = bert_tokenizer(bert_valid_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

bert_valid_labels = np.array(bert_valid_labels)

bert_test_tokenized = bert_tokenizer(bert_test_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

bert_test_labels = np.array(bert_test_labels)


model_checkpoint = 'bert-base-cased'

In [None]:
def create_bert_multiclass_model(checkpoint = model_checkpoint,
                                 num_classes = 3,
                                 hidden_size = 30, 
                                 dropout=0.2,
                                 learning_rate=0.00002):
   
    bert_model = TFBertModel.from_pretrained(checkpoint)                                     

    
    bert_model.trainable = True
    
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}      

    bert_out = bert_model(bert_inputs)

    pooler_token = bert_out[1]

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(pooler_token)

    hidden = tf.keras.layers.Dropout(dropout)(hidden)  

    classification = tf.keras.layers.Dense(num_classes, activation='softmax',name='classification_layer')(hidden)
    
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate,weight_decay=0.01),
                                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), 
                                 metrics='accuracy')

    ### END YOUR CODE
    return classification_model

In [None]:
pooler_bert_model = create_bert_multiclass_model(checkpoint=model_checkpoint, num_classes=3)

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
pooler_bert_model_history = pooler_bert_model.fit([bert_train_tokenized.input_ids, bert_train_tokenized.token_type_ids, bert_train_tokenized.attention_mask], 
                                                  bert_train_labels,   
                                                  validation_data=([bert_valid_tokenized.input_ids, bert_valid_tokenized.token_type_ids, bert_valid_tokenized.attention_mask],
                                                  bert_valid_labels),    
                                                  batch_size=32, 
                                                  epochs=5)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
bert_only_results = pd.DataFrame(pooler_bert_model_history.history)
bert_only_results.to_csv(data_path + '/bert_only_results_3.csv')

In [None]:
bert_uncompiled_results = pooler_bert_model([bert_test_tokenized.input_ids, 
                                             bert_test_tokenized.token_type_ids, 
                                             bert_test_tokenized.attention_mask])

bert_uncompiled_results = le.inverse_transform(tf.argmax(bert_uncompiled_results, axis=-1))


In [None]:
def run_bert(df, model, model_tokenizer):
  output = []
  for index, row in df.iterrows():
    input_str = row.Title
    tgt_entities = row.entities
    row_output = {}
    for e in tgt_entities:
      single_output = run_single_ent(input_str, e, model, model_tokenizer)
      row_output[e] = single_output
    output.append(row_output)
  return output

def run_single_ent(input_str, tgt_ent, model, model_tokenizer):
  input_str = input_str.replace(tgt_ent, 'TGT')
  input = model_tokenizer(input_str,
                         max_length=max_length,
                         truncation=True,
                         padding='max_length',
                         return_tensors='tf')
  single_output = model([input.input_ids, input.token_type_ids, input.attention_mask])
  single_output = le.inverse_transform(tf.argmax(single_output, axis=-1))[0]
  return single_output

In [None]:
bert_only_test_results = run_bert(test_df, pooler_bert_model, bert_tokenizer)

In [29]:
def count_correct(df,results):
  num_correct = 0
  num_incorrect = 0
  i = 0
  for index, row in df.iterrows():
    decision = row.Decisions.replace('"',"'")
    if decision == str(results[i]):
      num_correct += 1
    else:
      num_incorrect += 1
    i += 1
  return num_correct, num_incorrect, num_correct/len(results)

In [None]:
print("BERT only test results: ", count_correct(test_df, bert_only_test_results))


BERT only test results:  (912, 165, 0.8467966573816156)


In [None]:
bert_only_classification_report = pd.DataFrame(classification_report(le.inverse_transform(bert_test_labels), bert_uncompiled_results,output_dict=True))
bert_only_classification_report.to_csv(data_path + "/bert_only_classification_report_3.csv")
print(classification_report(le.inverse_transform(bert_test_labels), bert_uncompiled_results))

              precision    recall  f1-score   support

    negative       0.88      0.88      0.88       402
     neutral       0.83      0.87      0.85       580
    positive       0.90      0.84      0.87       474

    accuracy                           0.86      1456
   macro avg       0.87      0.86      0.86      1456
weighted avg       0.86      0.86      0.86      1456



## FinBERT

In [None]:
#finbert = TFBertModel.from_pretrained("ProsusAI/finbert" )
finbert_tokenizer = BertTokenizer.from_pretrained("ProsusAI/finbert" )

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

In [None]:
max_length = 30

finbert_train_tokenized = finbert_tokenizer(bert_train_input,       
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

finbert_train_labels = np.array(bert_train_labels)

finbert_valid_tokenized = finbert_tokenizer(bert_valid_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

finbert_valid_labels = np.array(bert_valid_labels)

finbert_test_tokenized = finbert_tokenizer(bert_test_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

finbert_test_labels = np.array(bert_test_labels)

In [None]:
finbert_model = create_bert_multiclass_model(checkpoint="ProsusAI/finbert", num_classes=3)

All model checkpoint layers were used when initializing TFBertModel.

All the layers of TFBertModel were initialized from the model checkpoint at ProsusAI/finbert.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [None]:
finbert_model_history = finbert_model.fit([finbert_train_tokenized.input_ids, finbert_train_tokenized.token_type_ids, finbert_train_tokenized.attention_mask], 
                                           finbert_train_labels,   
                                           validation_data=([finbert_valid_tokenized.input_ids, finbert_valid_tokenized.token_type_ids, finbert_valid_tokenized.attention_mask],
                                           finbert_valid_labels),    
                                           batch_size=32, 
                                           epochs=5)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
finbert_only_results = pd.DataFrame(finbert_model_history.history)
finbert_only_results.to_csv(data_path + '/finbert_only_results_3.csv')

In [None]:
finbert_only_test_results = run_bert(test_df, finbert_model, finbert_tokenizer)

In [None]:
print("FinBERT only test results: ", count_correct(test_df, finbert_only_test_results))

FinBERT only test results:  (920, 157, 0.8542246982358404)


In [None]:
finbert_uncompiled_results = finbert_model([finbert_test_tokenized.input_ids, 
                                             finbert_test_tokenized.token_type_ids, 
                                             finbert_test_tokenized.attention_mask])

finbert_uncompiled_results = le.inverse_transform(tf.argmax(finbert_uncompiled_results, axis=-1))


In [None]:
finbert_only_classification_report = pd.DataFrame(classification_report(le.inverse_transform(finbert_test_labels), finbert_uncompiled_results, output_dict=True))
finbert_only_classification_report.to_csv(data_path + "/finbert_only_classification_report_3.csv")
print(classification_report(le.inverse_transform(finbert_test_labels), finbert_uncompiled_results))

              precision    recall  f1-score   support

    negative       0.89      0.87      0.88       402
     neutral       0.86      0.84      0.85       580
    positive       0.87      0.90      0.88       474

    accuracy                           0.87      1456
   macro avg       0.87      0.87      0.87      1456
weighted avg       0.87      0.87      0.87      1456



## RoBERTa

In [15]:
from transformers import RobertaTokenizer, TFRobertaModel
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = TFRobertaModel.from_pretrained('roberta-base')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/657M [00:00<?, ?B/s]

Some layers from the model checkpoint at roberta-base were not used when initializing TFRobertaModel: ['lm_head']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


### FiQA

In [17]:
max_length = 30
fiqa_roberta_train_tokenized = roberta_tokenizer(fiqa_bert_train_input,       
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')


fiqa_roberta_train_labels = np.array(fiqa_bert_train_labels)

fiqa_roberta_valid_tokenized = roberta_tokenizer(fiqa_bert_valid_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

fiqa_roberta_valid_labels = np.array(fiqa_bert_valid_labels)
model_checkpoint = 'roberta-base'

In [18]:
def create_roberta_fiqa_model(checkpoint = model_checkpoint,
                              num_classes = 3,
                              hidden_size = 30, 
                              dropout=0.2,
                              learning_rate=0.00002):
   
    roberta_model = TFRobertaModel.from_pretrained(checkpoint)                                     

    
    roberta_model.trainable = True
    
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='input_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask_layer')

    roberta_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    roberta_out = roberta_model(roberta_inputs)

    pooler_token = roberta_out[1]
    print(pooler_token.shape)

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(pooler_token)

    hidden = tf.keras.layers.Dropout(dropout)(hidden)  

    classification = tf.keras.layers.Dense(num_classes, activation='softmax',name='classification_layer')(hidden)
    
    classification_model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, weight_decay=0.02),
                                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), 
                                 metrics='accuracy')

    ### END YOUR CODE
    return classification_model

In [19]:
fiqa_roberta_model = create_roberta_fiqa_model(checkpoint=model_checkpoint, 
                                               num_classes=3,
                                               dropout=0.15,
                                               hidden_size=30,
                                               learning_rate=0.000005)

Some layers from the model checkpoint at roberta-base were not used when initializing TFRobertaModel: ['lm_head']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


(None, 768)


In [20]:
fiqa_roberta_model_history = fiqa_roberta_model.fit([fiqa_roberta_train_tokenized.input_ids, fiqa_roberta_train_tokenized.attention_mask], 
                                                     fiqa_roberta_train_labels,   
                                                     validation_data=([fiqa_roberta_valid_tokenized.input_ids, fiqa_roberta_valid_tokenized.attention_mask],
                                                     fiqa_roberta_valid_labels),    
                                                     batch_size=4, 
                                                     epochs=5) 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Sentfin

In [21]:
max_length = 30
roberta_train_tokenized = roberta_tokenizer(bert_train_input,       
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')


roberta_train_labels = np.array(bert_train_labels)

roberta_valid_tokenized = roberta_tokenizer(bert_valid_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

roberta_valid_labels = np.array(bert_valid_labels)

roberta_test_tokenized = roberta_tokenizer(bert_test_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

roberta_test_labels = np.array(bert_test_labels)

model_checkpoint = 'roberta-base'

In [119]:
def create_roberta_multiclass_model(#checkpoint = model_checkpoint,
                                    model=fiqa_roberta_model,
                              num_classes = 3,
                              hidden_size = 30, 
                              dropout=0.2,
                              learning_rate=0.00002):

    roberta_model = model.layers[2] 
    #roberta_model = TFRobertaModel.from_pretrained(checkpoint)                               
    
    roberta_model.trainable = True
    
    input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='input_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int64, name='attention_mask_layer')

    roberta_inputs = {'input_ids': input_ids,
                   'attention_mask': attention_mask}      

    roberta_out = roberta_model(roberta_inputs)

    pooler_token = roberta_out[1]
    print(pooler_token.shape)

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(pooler_token)

    hidden = tf.keras.layers.Dropout(dropout)(hidden)  

    classification = tf.keras.layers.Dense(num_classes, activation='softmax',name='classification_layer')(hidden)
    
    classification_model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, weight_decay=0.02),
                                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), 
                                 metrics='accuracy')

    ### END YOUR CODE
    return classification_model

In [23]:
roberta_model = create_roberta_multiclass_model(num_classes=3,
                                               dropout=0.2,
                                               hidden_size=30,
                                               learning_rate=0.000005)

Some layers from the model checkpoint at roberta-base were not used when initializing TFRobertaModel: ['lm_head']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at roberta-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


(None, 768)


In [24]:
roberta_model_history = roberta_model.fit([roberta_train_tokenized.input_ids, roberta_train_tokenized.attention_mask], 
                                           roberta_train_labels,   
                                           validation_data=([roberta_valid_tokenized.input_ids, roberta_valid_tokenized.attention_mask],
                                           roberta_valid_labels),    
                                           batch_size=32, 
                                           epochs=5)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [120]:
pretrain_roberta_model = create_roberta_multiclass_model(num_classes=3,
                                               dropout=0.2,
                                               hidden_size=30,
                                               learning_rate=0.000005)

(None, 768)


In [121]:
pretrain_roberta_model_history = pretrain_roberta_model.fit([roberta_train_tokenized.input_ids, roberta_train_tokenized.attention_mask], 
                                           roberta_train_labels,   
                                           validation_data=([roberta_valid_tokenized.input_ids, roberta_valid_tokenized.attention_mask],
                                           roberta_valid_labels),    
                                           batch_size=32, 
                                           epochs=5)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [122]:
fiqa_roberta_only_results = pd.DataFrame(pretrain_roberta_model_history.history)
fiqa_roberta_only_results.to_csv(data_path + '/fiqa_roberta_only_results_final.csv')

In [25]:
roberta_only_results = pd.DataFrame(roberta_model_history.history)
roberta_only_results.to_csv(data_path + '/roberta_only_results_final.csv')

In [123]:
def run_roberta(df, model, model_tokenizer):
  output = []
  for index, row in df.iterrows():
    input_str = row.Title
    tgt_entities = row.entities
    row_output = {}
    for e in tgt_entities:
      single_output = run_single_ent_roberta(input_str, e, model, model_tokenizer)
      row_output[e] = single_output
    output.append(row_output)
  return output

def run_single_ent_roberta(input_str, tgt_ent, model, model_tokenizer):
  input_str = input_str.replace(tgt_ent, 'TGT')
  input = model_tokenizer(input_str,
                         max_length=max_length,
                         truncation=True,
                         padding='max_length',
                         return_tensors='tf')
  single_output = model([input.input_ids, input.attention_mask])
  single_output = le.inverse_transform(tf.argmax(single_output, axis=-1))[0]
  return single_output

In [27]:
roberta_only_test_results = run_roberta(test_df, roberta_model, roberta_tokenizer)

In [30]:
print("RoBERTa only test results: ", count_correct(test_df, roberta_only_test_results))

RoBERTa only test results:  (942, 135, 0.8746518105849582)


In [124]:
roberta_fiqa_only_test_results = run_roberta(test_df, pretrain_roberta_model, roberta_tokenizer)

In [125]:
print("RoBERTa+FiQA only test results: ", count_correct(test_df, roberta_fiqa_only_test_results))

RoBERTa+FiQA only test results:  (930, 147, 0.8635097493036211)


In [126]:
fiqa_roberta_uncompiled_results = pretrain_roberta_model([roberta_test_tokenized.input_ids,
                                            roberta_test_tokenized.attention_mask])

fiqa_roberta_uncompiled_results = le.inverse_transform(tf.argmax(fiqa_roberta_uncompiled_results, axis=-1))


In [127]:
fiqa_roberta_only_classification_report = pd.DataFrame(classification_report(le.inverse_transform(roberta_test_labels), fiqa_roberta_uncompiled_results, output_dict=True))
fiqa_roberta_only_classification_report.to_csv(data_path + "/fiqa_roberta_only_classification_report.csv")
print(classification_report(le.inverse_transform(roberta_test_labels), fiqa_roberta_uncompiled_results))

              precision    recall  f1-score   support

    negative       0.88      0.92      0.90       402
     neutral       0.90      0.83      0.86       580
    positive       0.86      0.92      0.89       474

    accuracy                           0.88      1456
   macro avg       0.88      0.89      0.89      1456
weighted avg       0.88      0.88      0.88      1456



In [33]:
roberta_uncompiled_results = roberta_model([roberta_test_tokenized.input_ids,
                                            roberta_test_tokenized.attention_mask])

roberta_uncompiled_results = le.inverse_transform(tf.argmax(roberta_uncompiled_results, axis=-1))


In [34]:
roberta_only_classification_report = pd.DataFrame(classification_report(le.inverse_transform(roberta_test_labels), roberta_uncompiled_results, output_dict=True))
roberta_only_classification_report.to_csv(data_path + "/roberta_only_classification_report_final.csv")
print(classification_report(le.inverse_transform(roberta_test_labels), roberta_uncompiled_results))

              precision    recall  f1-score   support

    negative       0.90      0.90      0.90       402
     neutral       0.88      0.87      0.87       580
    positive       0.89      0.91      0.90       474

    accuracy                           0.89      1456
   macro avg       0.89      0.89      0.89      1456
weighted avg       0.89      0.89      0.89      1456



## T5 - NER

In [35]:
from transformers import T5Tokenizer, TFT5Model, TFT5ForConditionalGeneration
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, create_optimizer

In [36]:
from transformers import modeling_tf_utils

In [37]:
t5_model = TFT5ForConditionalGeneration.from_pretrained('t5-base')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/892M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [38]:
max_length = 30

ner_train_input = train_df.Title.to_list()
ner_train_output = train_df.entities_str.to_list()


ner_valid_input = valid_df.Title.to_list()
ner_valid_output = valid_df.entities_str.to_list()

ner_train_input_tokenized = t5_tokenizer(ner_train_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

ner_train_output_tokenized = t5_tokenizer(ner_train_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

ner_valid_input_tokenized = t5_tokenizer(ner_valid_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

ner_valid_output_tokenized = t5_tokenizer(ner_valid_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

ner_train_encodings = {
        'input_ids': ner_train_input_tokenized['input_ids'], 
        'attention_mask': ner_train_input_tokenized['attention_mask'],
        'labels': ner_train_output_tokenized['input_ids']}


ner_valid_encodings = {
        'input_ids': ner_valid_input_tokenized['input_ids'], 
        'attention_mask': ner_valid_input_tokenized['attention_mask'],
        'labels': ner_valid_output_tokenized['input_ids']}


ner_train_labels = np.array(ner_train_output_tokenized.input_ids)
for x in ner_train_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
ner_train_labels = tf.convert_to_tensor(ner_train_labels)
ner_train_encodings['labels'] = ner_train_labels

ner_valid_labels = np.array(ner_valid_output_tokenized.input_ids)
for x in ner_valid_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
ner_valid_labels = tf.convert_to_tensor(ner_valid_labels)
ner_valid_encodings['labels'] = ner_valid_labels

In [39]:
ner_train_encodings = {
        'input_ids': list(np.array(ner_train_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(ner_train_input_tokenized['attention_mask'])),
        'labels': list(np.array(ner_train_encodings['labels']))}
ner_valid_encodings = {
        'input_ids': list(np.array(ner_valid_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(ner_valid_input_tokenized['attention_mask'])),
        'labels': list(np.array(ner_valid_encodings['labels']))}

ner_train_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(train_df)
ner_valid_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(valid_df)

In [40]:
ner_train_encodings_df =  Dataset.from_pandas(pd.DataFrame(ner_train_encodings))
ner_valid_encodings_df =  Dataset.from_pandas(pd.DataFrame(ner_valid_encodings))

In [70]:
ner_model = TFT5ForConditionalGeneration.from_pretrained('t5-base')

batch_size = 32
num_epochs = 3

data_collator = DataCollatorForSeq2Seq(tokenizer=t5_tokenizer, model=ner_model, return_tensors="tf")


tf_train = ner_train_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)

tf_valid = ner_valid_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)

num_train_steps = len(tf_train) * num_epochs

# optimizer, schedule = create_optimizer(
#     init_lr=3e-4,
#     num_warmup_steps=0,
#     num_train_steps=num_train_steps,
#     weight_decay_rate=0.02)

ner_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001,weight_decay=0.02))



All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


In [71]:
ner_model.fit(
  tf_train,
  validation_data = tf_valid,
  epochs=3,
  batch_size=batch_size
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fcb2d9a9eb0>

In [72]:
ner_test_input = test_df.Title.to_list()
ner_test_output = test_df.entities_str.to_list()

ner_test_input_tokenized = t5_tokenizer(ner_test_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

ner_test_output_tokenized = t5_tokenizer(ner_test_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

In [73]:
ner_test_encodings = {
        'input_ids': ner_test_input_tokenized['input_ids'], 
        'attention_mask': ner_test_input_tokenized['attention_mask'],
        'labels': ner_test_output_tokenized['input_ids']}


ner_test_labels = np.array(ner_test_output_tokenized.input_ids)
for x in ner_test_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
ner_test_labels = tf.convert_to_tensor(ner_test_labels)
ner_test_encodings['labels'] = ner_test_labels

In [74]:
ner_test_ids = ner_model.generate(ner_test_encodings['input_ids'])
ner_test_results = [t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in ner_test_ids]



In [75]:
ner_results_df = pd.DataFrame(ner_test_results, ner_test_output).reset_index()
ner_results_df.columns= ["Model_entities", "True_entities"]
ner_results_df.to_csv(data_path + "/t5_ner_final.csv")

In [76]:
def count_correct_ents(df,results):
  num_correct = 0
  num_incorrect = 0
  i = 0
  for index, row in df.iterrows():
    ents = row.entities_str
    if ents == results[i]:
      num_correct += 1
    else:
      num_incorrect += 1
    i += 1
  return num_correct, num_incorrect, num_correct/len(results)

In [77]:
count_correct_ents(test_df, ner_test_results)

(870, 207, 0.807799442896936)

## T5 + BERT

In [78]:
test_df['entities_modeled'] = np.array(ner_test_results)

In [79]:
def get_ner_list(x):
  ner_lst = x.entities_modeled.split(", ")
  return ner_lst

test_df['entities_modeled_lst'] = test_df.apply(get_ner_list, axis=1)

In [80]:
def get_test_target(x):
  sentences = []
  for t in x.entities_modeled_lst:
    s = x.Title
    s = s.replace(t, "TGT")
    sentences.append(s)
  return sentences

test_df['sentences_input_modeled'] = test_df.apply(get_test_target, axis=1)


In [81]:
t5_bert_test_input = []
for index, row in test_df.iterrows():
  t5_bert_test_input.extend(row.sentences_input_modeled)

t5_bert_test_tokenized = roberta_tokenizer(t5_bert_test_input,     
              truncation=True,
              padding='max_length',
              max_length=max_length,
              return_tensors='tf')

In [128]:
def run_roberta(df):
  output = []
  for index, row in df.iterrows():
    input_str = row.Title
    tgt_entities = row.entities_modeled_lst
    row_output = {}
    for e in tgt_entities:
      single_output = run_single_ent_roberta(input_str, e)
      row_output[e] = single_output
    output.append(row_output)
  return output

def run_single_ent_roberta(input_str, tgt_ent):
  input_str = input_str.replace(tgt_ent, 'TGT')
  input = roberta_tokenizer(input_str,
                         max_length=max_length,
                         truncation=True,
                         padding='max_length',
                         return_tensors='tf')
  single_output = pretrain_roberta_model([input.input_ids, input.attention_mask])
  single_output = le.inverse_transform(tf.argmax(single_output, axis=-1))[0]
  return single_output

In [83]:
t5_bert_results = run_roberta(test_df)

In [129]:
t5_pretrain_bert_results = run_roberta(test_df)

In [84]:
def calc_result(df, results):
  num_correct = 0
  num_incorrect = 0
  for y_pred, y_label in zip(df.sent_dict, results):
    if y_pred == y_label:
      num_correct += 1
    else:
      num_incorrect += 1
  return num_correct, num_incorrect, num_correct/len(results)

In [85]:
## t5+roberta
calc_result(test_df, t5_bert_results)

(784, 293, 0.7279480037140205)

In [101]:
def clean_label(labels):
  cleaned_labels = []
  for x in labels:
    cleaned_x = x[1:-1].replace('"','')
    cleaned_labels.append(cleaned_x)
  return cleaned_labels

In [110]:
t5_roberta_results_df = pd.DataFrame([test_df.Title.to_list(), 
                                     t5_bert_results, 
                                     clean_label(test_df.Decisions.to_list())]).T
t5_roberta_results_df.columns = ["Title", "Model_output", "True_output"]

In [112]:
t5_roberta_results_df.to_csv(data_path + "/t5_roberta_outputs_final.csv")

In [179]:
t5_pretrain_results_df = pd.DataFrame([test_df.Title.to_list(), 
                                     t5_pretrain_bert_results, 
                                     clean_label(test_df.Decisions.to_list())]).T
t5_pretrain_results_df.columns = ["Title", "Model_output", "True_output"]

In [180]:
t5_pretrain_results_df.to_csv(data_path + "/t5_pretrain_outputs_final.csv")

In [113]:
def get_wrong_t5_roberta(df, results):
  title_lst = []
  correct_decision_lst = []
  wrong_decision_lst = []
  entities_lst = []
  for y_label, y_pred, title, entities in zip(df.sent_dict, results, df.Title, df.entities_modeled_lst):
    if y_pred != y_label:
      title_lst.append(title)
      correct_decision_lst.append(y_label)
      wrong_decision_lst.append(y_pred)
      entities_lst.append(entities)
  return title_lst, wrong_decision_lst, correct_decision_lst, entities_lst

In [114]:
wrong_title_lst, wrong_decision_lst, correct_decision_lst, wrong_entities_lst = get_wrong_t5_roberta(test_df, t5_bert_results)

In [115]:
wrong_df = pd.DataFrame([wrong_title_lst, wrong_decision_lst, correct_decision_lst, wrong_entities_lst]).T

In [116]:
wrong_df.columns = ["Title", "Model Decisions", "True Decisions", "Model Entities"]

In [117]:
wrong_df.to_csv(data_path + '/t5_roberta_wrong.csv')

In [130]:
## fiqa pretrain
calc_result(test_df, t5_pretrain_bert_results)

(775, 302, 0.7195914577530177)

## T5 Unified

In [131]:
## Split samples

max_length = 30

def clean_label(labels):
  cleaned_labels = []
  for x in labels:
    cleaned_x = x[1:-1].replace('"','')
    cleaned_labels.append(cleaned_x)
  return cleaned_labels

t5_train_input = train_df.Title.to_list()
t5_train_output = train_df.Decisions.to_list()
t5_train_output = clean_label(t5_train_output)


t5_valid_input = valid_df.Title.to_list()
t5_valid_output = valid_df.Decisions.to_list()
t5_valid_output = clean_label(t5_valid_output)


In [132]:
t5_train_input_tokenized = t5_tokenizer(t5_train_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

t5_train_output_tokenized = t5_tokenizer(t5_train_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

t5_valid_input_tokenized = t5_tokenizer(t5_valid_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

t5_valid_output_tokenized = t5_tokenizer(t5_valid_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

train_encodings = {
        'input_ids': t5_train_input_tokenized['input_ids'], 
        'attention_mask': t5_train_input_tokenized['attention_mask'],
        'labels': t5_train_output_tokenized['input_ids']}


valid_encodings = {
        'input_ids': t5_valid_input_tokenized['input_ids'], 
        'attention_mask': t5_valid_input_tokenized['attention_mask'],
        'labels': t5_valid_output_tokenized['input_ids']}


train_labels = np.array(t5_train_output_tokenized.input_ids)
for x in train_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
train_labels = tf.convert_to_tensor(train_labels)
train_encodings['labels'] = train_labels

valid_labels = np.array(t5_valid_output_tokenized.input_ids)
for x in valid_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
valid_labels = tf.convert_to_tensor(valid_labels)
valid_encodings['labels'] = valid_labels

In [133]:
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, create_optimizer

In [134]:
train_encodings = {
        'input_ids': list(np.array(t5_train_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(t5_train_input_tokenized['attention_mask'])),
        'labels': list(np.array(train_encodings['labels']))}
valid_encodings = {
        'input_ids': list(np.array(t5_valid_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(t5_valid_input_tokenized['attention_mask'])),
        'labels': list(np.array(valid_encodings['labels']))}

train_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(train_df)
valid_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(valid_df)

In [135]:
train_encodings_df =  Dataset.from_pandas(pd.DataFrame(train_encodings))
valid_encodings_df =  Dataset.from_pandas(pd.DataFrame(valid_encodings))

In [136]:
t5_model = TFT5ForConditionalGeneration.from_pretrained('t5-base')

batch_size = 32
num_epochs = 3

data_collator = DataCollatorForSeq2Seq(tokenizer=t5_tokenizer, model=t5_model, return_tensors="tf")


tf_train = train_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)

tf_valid = valid_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)


t5_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001,weight_decay=0.02))



All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


In [137]:
t5_model_history = t5_model.fit(tf_train,
                                validation_data = tf_valid,
                                epochs=5,
                                batch_size=batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [140]:
t5_test_input = test_df.Title.to_list()
t5_test_output = test_df.Decisions.to_list()

t5_test_input_tokenized = t5_tokenizer(t5_test_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

t5_test_output_tokenized = t5_tokenizer(t5_test_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

In [141]:
test_encodings = {
        'input_ids': t5_test_input_tokenized['input_ids'], 
        'attention_mask': t5_test_input_tokenized['attention_mask'],
        'labels': t5_test_output_tokenized['input_ids']}


test_labels = np.array(t5_test_output_tokenized.input_ids)
for x in test_labels:
  x[x == t5_tokenizer.pad_token_id] = -100
test_labels = tf.convert_to_tensor(test_labels)
test_encodings['labels'] = test_labels

In [142]:
t5_test_ids = t5_model.generate(test_encodings['input_ids'])



In [143]:
t5_test_results = [t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in t5_test_ids]

In [144]:
def count_correct(df,results):
  num_correct = 0
  num_incorrect = 0
  i = 0
  for index, row in df.iterrows():
    decision = row.Decisions[1:-1].replace('"','')
    if decision == results[i]:
      num_correct += 1
    else:
      num_incorrect += 1
    i += 1
  return num_correct, num_incorrect, num_correct/len(results)

In [145]:
count_correct(test_df, t5_test_results)

(657, 420, 0.6100278551532033)

In [146]:
t5_unified_results_df = pd.DataFrame([test_df.Title.to_list(), 
                                     t5_test_results, 
                                     clean_label(test_df.Decisions.to_list())]).T
t5_unified_results_df.columns = ["Title", "Model_output", "True_output"]

In [147]:
t5_unified_results_df.to_csv(data_path + "/t5_unified_outputs_final.csv")

## FLAN-T5 + BERT

In [None]:
flan_t5_model = TFT5ForConditionalGeneration.from_pretrained('google/flan-t5-base')
flan_t5_tokenizer = T5Tokenizer.from_pretrained('google/flan-t5-base')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at google/flan-t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

In [None]:
max_length = 30

flan_ner_train_input = train_df.Title.to_list()
flan_ner_train_output = train_df.entities_str.to_list()


flan_ner_valid_input = valid_df.Title.to_list()
flan_ner_valid_output = valid_df.entities_str.to_list()

flan_ner_train_input_tokenized = flan_t5_tokenizer(flan_ner_train_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

flan_ner_train_output_tokenized = flan_t5_tokenizer(flan_ner_train_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

flan_ner_valid_input_tokenized = flan_t5_tokenizer(flan_ner_valid_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

flan_ner_valid_output_tokenized = flan_t5_tokenizer(flan_ner_valid_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

flan_ner_train_encodings = {
        'input_ids': flan_ner_train_input_tokenized['input_ids'], 
        'attention_mask': flan_ner_train_input_tokenized['attention_mask'],
        'labels': flan_ner_train_output_tokenized['input_ids']}


flan_ner_valid_encodings = {
        'input_ids': flan_ner_valid_input_tokenized['input_ids'], 
        'attention_mask': flan_ner_valid_input_tokenized['attention_mask'],
        'labels': flan_ner_valid_output_tokenized['input_ids']}


flan_ner_train_labels = np.array(flan_ner_train_output_tokenized.input_ids)
for x in flan_ner_train_labels:
  x[x == flan_t5_tokenizer.pad_token_id] = -100
flan_ner_train_labels = tf.convert_to_tensor(flan_ner_train_labels)
flan_ner_train_encodings['labels'] = flan_ner_train_labels

flan_ner_valid_labels = np.array(flan_ner_valid_output_tokenized.input_ids)
for x in flan_ner_valid_labels:
  x[x == flan_t5_tokenizer.pad_token_id] = -100
flan_ner_valid_labels = tf.convert_to_tensor(flan_ner_valid_labels)
flan_ner_valid_encodings['labels'] = flan_ner_valid_labels

In [None]:
flan_ner_train_encodings = {
        'input_ids': list(np.array(flan_ner_train_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(flan_ner_train_input_tokenized['attention_mask'])),
        'labels': list(np.array(flan_ner_train_encodings['labels']))}
flan_ner_valid_encodings = {
        'input_ids': list(np.array(flan_ner_valid_input_tokenized['input_ids'])), 
        'attention_mask': list(np.array(flan_ner_valid_input_tokenized['attention_mask'])),
        'labels': list(np.array(flan_ner_valid_encodings['labels']))}

flan_ner_train_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(train_df)
flan_ner_valid_encodings['decoder_input_ids'] =  [np.zeros(30)]*len(valid_df)

In [None]:
flan_ner_train_encodings_df =  Dataset.from_pandas(pd.DataFrame(flan_ner_train_encodings))
flan_ner_valid_encodings_df =  Dataset.from_pandas(pd.DataFrame(flan_ner_valid_encodings))

In [None]:
flan_ner_model = TFT5ForConditionalGeneration.from_pretrained('google/flan-t5-base')

batch_size = 32
num_epochs = 3

data_collator = DataCollatorForSeq2Seq(tokenizer=flan_t5_tokenizer, 
                                       model=flan_ner_model, 
                                       return_tensors="tf")


tf_train = flan_ner_train_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)

tf_valid = flan_ner_valid_encodings_df.to_tf_dataset(
  columns=["attention_mask", "input_ids", 'decoder_input_ids', 'labels'],
  shuffle=True,
  collate_fn=data_collator,
  batch_size=batch_size)

num_train_steps = len(tf_train) * num_epochs

# optimizer, schedule = create_optimizer(
#     init_lr=3e-4,
#     num_warmup_steps=0,
#     num_train_steps=num_train_steps,
#     weight_decay_rate=0.02)

flan_ner_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.00005,weight_decay=0.02))



All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at google/flan-t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
No loss specified in compile() - the model's internal loss computation will be used as the loss. Don't panic - this is a common way to train TensorFlow models in Transformers! To disable this behaviour please pass a loss argument, or explicitly pass `loss=None` if you do not want your model to compute a loss.


In [None]:
flan_ner_model.fit(
  tf_train,
  validation_data = tf_valid,
  epochs=4,
  batch_size=batch_size
)

Epoch 1/4


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f3858add910>

In [None]:
flan_ner_test_input = test_df.Title.to_list()
flan_ner_test_output = test_df.entities_str.to_list()

flan_ner_test_input_tokenized = flan_t5_tokenizer(flan_ner_test_input, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

flan_ner_test_output_tokenized = flan_t5_tokenizer(flan_ner_test_output, 
                                        truncation=True,
                                        padding='max_length',
                                        max_length=max_length,
                                        return_tensors='tf')

In [None]:
flan_ner_test_encodings = {
        'input_ids': flan_ner_test_input_tokenized['input_ids'], 
        'attention_mask': flan_ner_test_input_tokenized['attention_mask'],
        'labels': flan_ner_test_output_tokenized['input_ids']}


flan_ner_test_labels = np.array(flan_ner_test_output_tokenized.input_ids)
for x in flan_ner_test_labels:
  x[x == flan_t5_tokenizer.pad_token_id] = -100
flan_ner_test_labels = tf.convert_to_tensor(flan_ner_test_labels)
flan_ner_test_encodings['labels'] = flan_ner_test_labels

In [None]:
flan_ner_test_ids = flan_ner_model.generate(flan_ner_test_encodings['input_ids'])
flan_ner_test_results = [flan_t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in flan_ner_test_ids]



In [None]:
count_correct_ents(test_df, flan_ner_test_results)

(846, 231, 0.7855153203342619)

# Model Analysis

In [38]:
import pandas as pd
from sklearn.metrics import classification_report

In [3]:
ner_results_df = pd.read_csv(data_path + '/t5_ner_final.csv')
t5_unified_results_df = pd.read_csv(data_path + '/t5_unified_outputs_final.csv')
t5_roberta_results_df = pd.read_csv(data_path + '/t5_roberta_outputs_final.csv')
t5_pretrain_results_df = pd.read_csv(data_path + '/t5_pretrain_outputs_final.csv')
t5_unified_results_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Model_output,True_output
0,0,L&T in talk to supply nuclear equipments; stoc...,L&T: positive,L&T: positive
1,1,Voltas surges on cooling products-led beat in ...,Voltas: positive,Voltas: positive
2,2,Important that Bank Nifty captures 19150 level...,Bank Nifty: positive,Bank Nifty: neutral
3,3,"Gold dips on firmer equities; Middle East, Ukr...",Gold: negative,Gold: negative
4,4,Citigroup sells shares worth Rs 480 cr of 35 I...,Citigroup: neutral,"Indian cos: negative, Citigroup: neutral"


In [4]:
t5_roberta_results_df.True_output[1]

'Voltas: positive'

In [5]:
def loose_ner_accuracy(df):
  num_correct = 0
  num_incorrect = 0
  correct = []
  for index, row in df.iterrows():
    true_entities = row.True_entities.split(", ")
    #print("true:", true_entities)
    model_entities = row.Model_entities.split(", ")
    #print("model:", model_entities)
    e_correct = 0
    for e in true_entities:
      w = e.split(" ")
      for e_m in model_entities:
        w_m = e_m.split(" ")
        if w_m[0] == w[0]:
          e_correct += 1
    if e_correct == len(true_entities) and len(model_entities) == len(true_entities):
      correct.append(True)
      num_correct += 1
    else:
      correct.append(False)
  return num_correct, num_correct/len(df), correct

In [6]:
ner_correct_num, ner_accuracy, ner_correct_tag = loose_ner_accuracy(ner_results_df)
print(ner_accuracy)

0.8672237697307336


In [7]:
def get_true_dict(df):
  true_dict_lst = []
  for index, row in df.iterrows():
    true_output = row.True_output.split(", ")
    true_dict = {}
    for x in true_output:
      l = x.split(": ")
      try:
        true_dict[l[0]] = l[1]
      except:
        pass
        #print(true_output)
    true_dict_lst.append(true_dict)
  return true_dict_lst

def adj_accuracy(df):
  num_correct = 0
  correct_tag = []
  for index, row in df.iterrows():
    true_output = row.True_output_dict
    model_output = row.Model_output
    #print("true:", true_output)  
    #print("model:", model_output)
    e_correct = 0
    for e, s in true_output.items():
      for e_m, s_m in model_output.items():
        if e.split(" ")[0] == e_m.split(" ")[0]:
          if s == s_m:
            e_correct += 1
    if e_correct == len(true_output.keys()) and len(true_output.keys()) == len(model_output.keys()):
      num_correct += 1
      correct_tag.append(True)
    else:
      correct_tag.append(False)
     
  return num_correct, num_correct/len(df), correct_tag


In [8]:
def t5_get_true_dict(df):
  true_dict_lst = []
  for index, row in df.iterrows():
    true_output = row.True_output.split(", ")
    true_dict = {}
    for x in true_output:
      l = x.split(": ")
      try:
        true_dict[l[0]] = l[1]
      except:
        pass
        #print(true_output)
    true_dict_lst.append(true_dict)
  return true_dict_lst

def t5_get_model_dict(df):
  model_dict_lst = []
  for index, row in df.iterrows():
    model_output_str = row.Model_output
    if model_output_str[0] == "{":
        model_output_str = model_output_str[1:-1].replace("'", '')
    model_output = model_output_str.split(", ")
    model_dict = {}
    for x in model_output:
      
      l = x.split(": ")
      try:
        model_dict[l[0]] = l[1]
      except:
        pass
        #print(model_output)
    model_dict_lst.append(model_dict)
  return model_dict_lst

def t5_adj_accuracy(df):
  num_correct = 0
  correct_tag = []
  for index, row in df.iterrows():
    true_output = row.True_output_dict
    model_output = row.Model_output_dict
    #print("true:", true_output)  
    #print("model:", model_output)
    e_correct = 0
    for e, s in true_output.items():
      for e_m, s_m in model_output.items():
        if e.split(" ")[0] == e_m.split(" ")[0]:
          if s == s_m:
            e_correct += 1
    if e_correct == len(true_output.keys()) and len(true_output.keys()) == len(model_output.keys()):
      num_correct += 1
      correct_tag.append(True)
    else:
      correct_tag.append(False)
      #print("correct")
  return num_correct, num_correct/len(df), correct_tag


In [9]:
t5_unified_results_df["True_output_dict"] = np.array(t5_get_true_dict(t5_unified_results_df))
t5_unified_results_df["Model_output_dict"] = np.array(t5_get_model_dict(t5_unified_results_df))


t5_unified_adj_acc, t5_unified_results_df["Model_correct"] = t5_adj_accuracy(t5_unified_results_df)[1], t5_adj_accuracy(t5_unified_results_df)[2]

print("T5 unified adj acc: ", t5_unified_adj_acc)

T5 unified adj acc:  0.6480965645311049


In [10]:
t5_roberta_results_df["True_output_dict"] = np.array(t5_get_true_dict(t5_roberta_results_df))
t5_roberta_results_df["Model_output_dict"] = np.array(t5_get_model_dict(t5_roberta_results_df))

t5_roberta_adj_acc,  t5_roberta_results_df["Model_correct"] = t5_adj_accuracy(t5_roberta_results_df)[1], t5_adj_accuracy(t5_roberta_results_df)[2]
print("T5+RoBERTA adj acc: ", t5_roberta_adj_acc)

t5_pretrain_results_df["True_output_dict"] = np.array(t5_get_true_dict(t5_pretrain_results_df))
t5_pretrain_results_df["Model_output_dict"] = np.array(t5_get_model_dict(t5_pretrain_results_df))

t5_pretrain_adj_acc, t5_pretrain_results_df["Model_correct"] = t5_adj_accuracy(t5_pretrain_results_df)[1], t5_adj_accuracy(t5_pretrain_results_df)[2]
print("T5+RoBERTA+pretrain adj acc: ", t5_pretrain_adj_acc)

T5+RoBERTA adj acc:  0.7613741875580315
T5+RoBERTA+pretrain adj acc:  0.7511606313834726


In [11]:
def get_multi_ent(x):
  return len(x.True_output_dict.keys())

t5_unified_results_df["Num_Entity"] = t5_unified_results_df.apply(get_multi_ent, axis=1)
t5_roberta_results_df["Num_Entity"] = t5_roberta_results_df.apply(get_multi_ent, axis=1)
t5_pretrain_results_df["Num_Entity"] = t5_pretrain_results_df.apply(get_multi_ent, axis=1)


In [12]:
t5_unified_results_df.groupby([ "Model_correct", "Num_Entity"]).count().Title

Model_correct  Num_Entity
False          1             190
               2             132
               3              42
               4              10
               5               5
True           1             599
               2              86
               3              12
               4               1
Name: Title, dtype: int64

In [13]:
t5_roberta_results_df.groupby(["Model_correct", "Num_Entity"]).count().Title

Model_correct  Num_Entity
False          1             147
               2              78
               3              26
               4               5
               5               1
True           1             642
               2             140
               3              28
               4               6
               5               4
Name: Title, dtype: int64

In [14]:
t5_pretrain_results_df.groupby(["Model_correct", "Num_Entity"]).count().Title

Model_correct  Num_Entity
False          1             156
               2              79
               3              25
               4               7
               5               1
True           1             633
               2             139
               3              29
               4               4
               5               4
Name: Title, dtype: int64

In [15]:
t5_unified_results_df.head()

Unnamed: 0.1,Unnamed: 0,Title,Model_output,True_output,True_output_dict,Model_output_dict,Model_correct,Num_Entity
0,0,L&T in talk to supply nuclear equipments; stoc...,L&T: positive,L&T: positive,{'L&T': 'positive'},{'L&T': 'positive'},True,1
1,1,Voltas surges on cooling products-led beat in ...,Voltas: positive,Voltas: positive,{'Voltas': 'positive'},{'Voltas': 'positive'},True,1
2,2,Important that Bank Nifty captures 19150 level...,Bank Nifty: positive,Bank Nifty: neutral,{'Bank Nifty': 'neutral'},{'Bank Nifty': 'positive'},False,1
3,3,"Gold dips on firmer equities; Middle East, Ukr...",Gold: negative,Gold: negative,{'Gold': 'negative'},{'Gold': 'negative'},True,1
4,4,Citigroup sells shares worth Rs 480 cr of 35 I...,Citigroup: neutral,"Indian cos: negative, Citigroup: neutral","{'Indian cos': 'negative', 'Citigroup': 'neutr...",{'Citigroup': 'neutral'},False,2


In [16]:
def conflicting_sents(x):
  sents = set(x.True_output_dict.values())
  if len(sents) > 1:
    return "Conflict"
  else:
    return "No Conflict"

t5_unified_results_df["Conflict_sents"] = t5_unified_results_df.apply(conflicting_sents, axis=1)
t5_roberta_results_df["Conflict_sents"] = t5_roberta_results_df.apply(conflicting_sents, axis=1)
t5_pretrain_results_df["Conflict_sents"] = t5_pretrain_results_df.apply(conflicting_sents, axis=1)


In [17]:
t5_unified_results_df.groupby([ "Model_correct", "Conflict_sents"]).count().Title

Model_correct  Conflict_sents
False          Conflict           97
               No Conflict       282
True           Conflict           33
               No Conflict       665
Name: Title, dtype: int64

In [18]:
t5_roberta_results_df.groupby([ "Model_correct", "Conflict_sents"]).count().Title

Model_correct  Conflict_sents
False          Conflict           43
               No Conflict       214
True           Conflict           87
               No Conflict       733
Name: Title, dtype: int64

In [19]:
t5_pretrain_results_df.groupby([ "Model_correct", "Conflict_sents"]).count().Title

Model_correct  Conflict_sents
False          Conflict           45
               No Conflict       223
True           Conflict           85
               No Conflict       724
Name: Title, dtype: int64

In [28]:
def get_single(df):
  correct_ent_lst =[]
  correct_sent_lst = []
  model_sent_lst = []
  for index, row in df.iterrows():
    true_ent_lst = list(row.True_output_dict.keys())
    model_ent_lst = list(row.Model_output_dict.keys())
    for ent in true_ent_lst:
      if ent in model_ent_lst:
        correct_ent_lst.append(ent)
        correct_sent_lst.append(row.True_output_dict[ent])
        model_sent_lst.append(row.Model_output_dict[ent])

  return [correct_ent_lst, correct_sent_lst, model_sent_lst]



In [35]:
t5_unified_single_df = pd.DataFrame(get_single(t5_unified_results_df)).T
t5_unified_single_df.columns = ["Entity", "True_Label", "Model_Label"]
t5_unified_single_df

Unnamed: 0,Entity,True_Label,Model_Label
0,L&T,positive,positive
1,Voltas,positive,positive
2,Bank Nifty,neutral,positive
3,Gold,negative,negative
4,Citigroup,neutral,neutral
...,...,...,...
1179,BNP Paribas,neutral,neutral
1180,Gold,negative,negative
1181,Havells,negative,negative
1182,Nirmal Bang Securities,neutral,neutral


In [36]:
t5_roberta_single_df = pd.DataFrame(get_single(t5_roberta_results_df)).T
t5_roberta_single_df.columns = ["Entity", "True_Label", "Model_Label"]
t5_roberta_single_df

Unnamed: 0,Entity,True_Label,Model_Label
0,L&T,positive,positive
1,Voltas,positive,positive
2,Bank Nifty,neutral,neutral
3,Gold,negative,negative
4,Citigroup,neutral,neutral
...,...,...,...
1313,BNP Paribas,neutral,neutral
1314,Gold,negative,negative
1315,Havells,negative,negative
1316,Nirmal Bang Securities,neutral,neutral


In [42]:
print(classification_report(t5_unified_single_df.True_Label, t5_unified_single_df.Model_Label, digits=4))

              precision    recall  f1-score   support

    negative     0.8143    0.8457    0.8297       337
     neutral     0.7936    0.7600    0.7764       425
    neutral,     0.0000    0.0000    0.0000         0
    positive     0.8568    0.8649    0.8608       422

    accuracy                         0.8218      1184
   macro avg     0.6162    0.6177    0.6167      1184
weighted avg     0.8220    0.8218    0.8217      1184



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [41]:
print(classification_report(t5_roberta_single_df.True_Label, t5_roberta_single_df.Model_Label, digits=4))

              precision    recall  f1-score   support

    negative     0.9093    0.8917    0.9004       360
     neutral     0.8713    0.8730    0.8722       504
    positive     0.8957    0.9075    0.9015       454

    accuracy                         0.8900      1318
   macro avg     0.8921    0.8907    0.8914      1318
weighted avg     0.8901    0.8900    0.8900      1318

