In [1]:
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import re
import numpy as np
import os
from tqdm import tqdm
from transformers import shape_list, BertTokenizer, TFBertModel
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from seqeval.metrics import f1_score, classification_report
import tensorflow as tf
from transformers import TFBertForTokenClassification

2023-12-07 15:59:56.511667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-07 15:59:56.703449: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-07 15:59:57.493617: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-12-07 15:59:57.493733: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinf

In [2]:
RANDOM_STATE = 42
MAX_SEQ_LEN = 850
GPU_NUM = 0
HUGGINGFACE_MODEL_PATH ="klue/bert-base"

# 1. 데이터 로드

In [3]:
df = pd.read_csv("../data/BIO_tagged/BIO_tagged_GYRO_trainset.csv")

In [4]:
df

Unnamed: 0,i,doc_idx,start_idx,token,tag
0,0,7.00,0,우리가,O
1,0,7.00,4,RAMP를,O
2,0,7.00,10,지나,O
3,0,7.00,13,진행하고,O
4,0,7.00,18,있을,O
...,...,...,...,...,...
109776,873,984.00,465,생각하기엔,O
109777,873,984.00,471,문제가,O
109778,873,984.00,475,있는,O
109779,873,984.00,478,것으로,O


In [5]:
print('데이터에 Null 값이 있는지 유무 : ' + str(df.isnull().values.any()))
if df.isnull().values.any():
    df = df.dropna().reset_index()
    print('Null 값 삭제 후 데이터프레임 행의 개수 : {}'.format(len(df)))    
    print('Null 값 삭제 후 데이터에 Null 값이 있는지 유무 : ' + str(df.isnull().values.any()))

데이터에 Null 값이 있는지 유무 : False


- token to sentence

In [6]:
def dataset_to_sentence_and_labels(df):
    dataset_sentence_list = []
    dataset_label_list = []
    temp_sent_list = []
    temp_label_list = []

    labels_list = []

    temp_sent_list.append(df["token"][0])
    temp_label_list.append(df["tag"][0])
    prev_sentence = df["i"][0]
    for i in tqdm(range(1, len(df))):
        tag = df["tag"][i]
        if tag != "O":
            tag = "key"
        else:
            tag = "O"
            
        labels_list.append(tag)
        if df["i"][i] != prev_sentence:
            dataset_sentence_list.append(temp_sent_list)
            dataset_label_list.append(temp_label_list)
            temp_sent_list = []
            temp_label_list = []
        temp_sent_list.append(df["token"][i])
        temp_label_list.append(tag)
        prev_sentence = df["i"][i]
        
    return dataset_sentence_list, dataset_label_list, labels_list

In [7]:
dataset_sentence_list, dataset_label_list, labels_list = dataset_to_sentence_and_labels(df)

100% 109780/109780 [00:04<00:00, 24229.42it/s]


In [8]:
train_data_sentence, test_data_sentence, train_data_label, test_data_label = train_test_split(dataset_sentence_list, dataset_label_list, test_size=.2, shuffle=False, random_state=RANDOM_STATE)

In [9]:
# labels = [label.strip() for label in open('BIO-label-LV2.txt', 'r', encoding='utf-8')]
labels = list(set(labels_list))
labels.sort()
print('개체명 태깅 정보 :', labels)

개체명 태깅 정보 : ['O', 'key']


In [10]:
tag_to_index = {tag: index for index, tag in enumerate(labels)}
index_to_tag = {index: tag for index, tag in enumerate(labels)}

In [11]:
tag_size = len(tag_to_index)
print('개체명 태깅 정보의 개수 :',tag_size)

개체명 태깅 정보의 개수 : 2


# 2. 전처리 예시

In [12]:
tokenizer = BertTokenizer.from_pretrained(HUGGINGFACE_MODEL_PATH)

# 3. 전처리

In [13]:
def convert_examples_to_features(examples, labels, max_seq_len, tokenizer,
                                 pad_token_id_for_segment=0, pad_token_id_for_label=-100):
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    pad_token_id = tokenizer.pad_token_id

    input_ids, attention_masks, token_type_ids, data_labels = [], [], [], []
    for example, label in tqdm(zip(examples, labels), total=len(examples)):
        tokens = []
        labels_ids = []
        for one_word, label_token in zip(example, label):
            subword_tokens = tokenizer.tokenize(one_word)
            tokens.extend(subword_tokens)
            if len(subword_tokens)>=1:
                labels_ids.extend([tag_to_index[label_token]] + [pad_token_id_for_label] * (len(subword_tokens) - 1))
            elif len(subword_tokens)==0:
                pass

        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[:(max_seq_len - special_tokens_count)]
            labels_ids = labels_ids[:(max_seq_len - special_tokens_count)]

        tokens += [sep_token]
        labels_ids += [pad_token_id_for_label]

        tokens = [cls_token] + tokens
        labels_ids = [pad_token_id_for_label] + labels_ids

        input_id = tokenizer.convert_tokens_to_ids(tokens)
        attention_mask = [1] * len(input_id)
        padding_count = max_seq_len - len(input_id)
        input_id = input_id + ([pad_token_id] * padding_count)
        attention_mask = attention_mask + ([0] * padding_count)
        token_type_id = [pad_token_id_for_segment] * max_seq_len
        label = labels_ids + ([pad_token_id_for_label] * padding_count)
        assert len(input_id) == max_seq_len, "Error with input length {} vs {}".format(len(input_id), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)
        assert len(token_type_id) == max_seq_len, "Error with token type length {} vs {}".format(len(token_type_id), max_seq_len)
        assert len(label) == max_seq_len, "Error with labels length {} vs {}".format(len(label), max_seq_len)

        input_ids.append(input_id)
        attention_masks.append(attention_mask)
        token_type_ids.append(token_type_id)
        data_labels.append(label)
        
    input_ids = np.array(input_ids, dtype=int)
    attention_masks = np.array(attention_masks, dtype=int)
    token_type_ids = np.array(token_type_ids, dtype=int)
    data_labels = np.asarray(data_labels, dtype=np.int32)

    return (input_ids, attention_masks, token_type_ids), data_labels

In [14]:
X_train, y_train = convert_examples_to_features(train_data_sentence, train_data_label, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer)

100% 698/698 [00:05<00:00, 122.57it/s]


In [15]:
X_valid, y_valid = convert_examples_to_features(test_data_sentence, test_data_label, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer)

100% 175/175 [00:01<00:00, 132.77it/s]


# 4. 모델링

# 5. 학습 및 평가

In [17]:
model = TFBertForTokenClassification.from_pretrained(HUGGINGFACE_MODEL_PATH, num_labels=tag_size, from_pt=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
model.compile(optimizer=optimizer, loss=model.hf_compute_loss)

2023-11-22 19:35:45.631053: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-22 19:35:59.694484: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46139 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:01:00.0, compute capability: 8.6
2023-11-22 19:35:59.697406: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 46139 MB memory:  -> device: 1, name: NVIDIA RTX A6000, pci bus id: 0000:05:00.0, compute capability: 8.6
2023-11-22 19:35:59.700112: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:2 w

In [18]:
class F1score(tf.keras.callbacks.Callback):
    def __init__(self, X_valid, y_valid):
        self.X_valid = X_valid
        self.y_valid = y_valid

    def sequences_to_tags(self, label_ids, pred_ids):
        label_list = []
        pred_list = []

        for i in range(0, len(label_ids)):
            label_tag = []
            pred_tag = []

        for label_index, pred_index in zip(label_ids[i], pred_ids[i]):
            if label_index != -100:
                label_tag.append(index_to_tag[label_index])
                pred_tag.append(index_to_tag[pred_index])

        label_list.append(label_tag)
        pred_list.append(pred_tag)

        return label_list, pred_list

    def on_epoch_end(self, epoch, logs={}):
        y_predicted = self.model.predict(self.X_valid)
        y_predicted = np.argmax(y_predicted.logits, axis = 2)

        label_list, pred_list = self.sequences_to_tags(self.y_valid, y_predicted)

        score = f1_score(label_list, pred_list, suffix=False)
        print(' - f1: {:04.2f}'.format(score * 100))
        print(classification_report(label_list, pred_list, suffix=False))

In [19]:
f1_score_report = F1score(X_valid, y_valid)

In [20]:
model.fit(
    X_train, y_train, epochs=3, batch_size=16, 
    callbacks=[f1_score_report]
)

model.save_pretrained(HUGGINGFACE_MODEL_PATH.replace('/', '-'))

Epoch 1/3
 - f1: 0.00
              precision    recall  f1-score   support

          ey       0.00      0.00      0.00         1

   micro avg       0.00      0.00      0.00         1
   macro avg       0.00      0.00      0.00         1
weighted avg       0.00      0.00      0.00         1

Epoch 2/3


  _warn_prf(average, modifier, msg_start, len(result))


 - f1: 0.00
              precision    recall  f1-score   support

          ey       0.00      0.00      0.00         1

   micro avg       0.00      0.00      0.00         1
   macro avg       0.00      0.00      0.00         1
weighted avg       0.00      0.00      0.00         1

Epoch 3/3
 - f1: 0.00
              precision    recall  f1-score   support

          ey       0.00      0.00      0.00         1

   micro avg       0.00      0.00      0.00         1
   macro avg       0.00      0.00      0.00         1
weighted avg       0.00      0.00      0.00         1



# 6. 최종 평가

In [21]:
from transformers import TFBertForTokenClassification

In [22]:
test_df = pd.read_csv("../data/BIO_tagged/BIO_tagged_GYRO_testset.csv")
test_df = test_df.replace(to_replace="B-causal factor", value="key").replace(to_replace="I-causal factor", value="key")

In [23]:
print('데이터에 Null 값이 있는지 유무 : ' + str(test_df.isnull().values.any()))
if test_df.isnull().values.any():
    test_df = test_df.dropna().reset_index()
    print('Null 값 삭제 후 데이터프레임 행의 개수 : {}'.format(len(test_df)))    
    print('Null 값 삭제 후 데이터에 Null 값이 있는지 유무 : ' + str(test_df.isnull().values.any()))

데이터에 Null 값이 있는지 유무 : False


In [24]:
test_dataset_sentence_list, test_dataset_label_list, test_labels_list = dataset_to_sentence_and_labels(test_df)

100% 29705/29705 [00:01<00:00, 24729.18it/s]


In [25]:
X_test, y_test = convert_examples_to_features(test_dataset_sentence_list, test_dataset_label_list, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer)

100% 218/218 [00:01<00:00, 113.59it/s]


In [26]:
with tf.device(f"/GPU:{GPU_NUM}"):
    load_model = TFBertForTokenClassification.from_pretrained(HUGGINGFACE_MODEL_PATH, num_labels=tag_size, from_pt=True)
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    load_model.compile(optimizer=optimizer, loss=load_model.hf_compute_loss)
    load_model.load_weights(f"{HUGGINGFACE_MODEL_PATH.replace('/', '-')}/tf_model.h5")

2023-12-07 16:00:33.054645: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-07 16:00:51.398830: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46139 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:01:00.0, compute capability: 8.6
2023-12-07 16:00:51.401918: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 46139 MB memory:  -> device: 1, name: NVIDIA RTX A6000, pci bus id: 0000:05:00.0, compute capability: 8.6
2023-12-07 16:00:51.404699: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1616] Created device /job:localhost/replica:0/task:0/device:GPU:2 w

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

2023-12-07 16:03:13.141919: I tensorflow/stream_executor/cuda/cuda_blas.cc:1614] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForTokenClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForTokenClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probabl

In [27]:
class F1score(tf.keras.callbacks.Callback):
    def __init__(self, X_valid, y_valid):
        self.X_valid = X_valid
        self.y_valid = y_valid

    def sequences_to_tags(self, label_ids, pred_ids):
        label_list = []
        pred_list = []

        for i in range(0, len(label_ids)):
            label_tag = []
            pred_tag = []

        for label_index, pred_index in zip(label_ids[i], pred_ids[i]):
            if label_index != -100:
                label_tag.append(index_to_tag[label_index])
                pred_tag.append(index_to_tag[pred_index])

        label_list.append(label_tag)
        pred_list.append(pred_tag)

        return label_list, pred_list

    def on_epoch_end(self, epoch, logs={}):
        y_predicted = self.model.predict(self.X_valid)
        y_predicted = np.argmax(y_predicted.logits, axis = 2)

        label_list, pred_list = self.sequences_to_tags(self.y_valid, y_predicted)

        score = f1_score(label_list, pred_list, suffix=False)
        print(' - f1: {:04.2f}'.format(score * 100))
        print(classification_report(label_list, pred_list, suffix=False))

In [29]:
def sequences_to_tags(label_ids, pred_ids):
    label_list = []
    pred_list = []

    for i in range(0, len(label_ids)):
        label_tag = []
        pred_tag = []

    for label_index, pred_index in zip(label_ids[i], pred_ids[i]):
        if label_index != -100:
            label_tag.append(index_to_tag[label_index])
            pred_tag.append(index_to_tag[pred_index])

    label_list.append(label_tag)
    pred_list.append(pred_tag)

    return label_list, pred_list
    
    
y_predicted = load_model.predict(X_valid) # 입력한 테스트용 샘플에 대해서 예측 y를 리턴
# y_predicted = model.predict(X_test) # 입력한 테스트용 샘플에 대해서 예측 y를 리턴
y_predicted = np.argmax(y_predicted.logits, axis=2)

label_list, pred_list = sequences_to_tags(y_test, y_predicted)

score = f1_score(label_list, pred_list, suffix=False)
print(' - f1: {:04.2f}'.format(score * 100))
print(classification_report(label_list, pred_list, suffix=False))



IndexError: index 217 is out of bounds for axis 0 with size 175

# 7. 예측

In [30]:
def convert_examples_to_features_for_prediction(examples, max_seq_len, tokenizer,
                                 pad_token_id_for_segment=0, pad_token_id_for_label=-100):
    cls_token = tokenizer.cls_token
    sep_token = tokenizer.sep_token
    pad_token_id = tokenizer.pad_token_id

    input_ids, attention_masks, token_type_ids, label_masks = [], [], [], []

    for example in tqdm(examples):
        tokens = []
        label_mask = []
        for one_word in example:
            subword_tokens = tokenizer.tokenize(one_word)
            tokens.extend(subword_tokens)
            if len(subword_tokens)>=1:
                label_mask.extend([0]+ [pad_token_id_for_label] * (len(subword_tokens) - 1))
            elif len(subword_tokens)==0:
                pass

        special_tokens_count = 2
        if len(tokens) > max_seq_len - special_tokens_count:
            tokens = tokens[:(max_seq_len - special_tokens_count)]
            label_mask = label_mask[:(max_seq_len - special_tokens_count)]

        tokens += [sep_token]
        label_mask += [pad_token_id_for_label]

        tokens = [cls_token] + tokens
        label_mask = [pad_token_id_for_label] + label_mask


        input_id = tokenizer.convert_tokens_to_ids(tokens)
        attention_mask = [1] * len(input_id)
        padding_count = max_seq_len - len(input_id)
        input_id = input_id + ([pad_token_id] * padding_count)
        attention_mask = attention_mask + ([0] * padding_count)
        token_type_id = [pad_token_id_for_segment] * max_seq_len
        label_mask = label_mask + ([pad_token_id_for_label] * padding_count)

        assert len(input_id) == max_seq_len, "Error with input length {} vs {}".format(len(input_id), max_seq_len)
        assert len(attention_mask) == max_seq_len, "Error with attention mask length {} vs {}".format(len(attention_mask), max_seq_len)
        assert len(token_type_id) == max_seq_len, "Error with token type length {} vs {}".format(len(token_type_id), max_seq_len)
        assert len(label_mask) == max_seq_len, "Error with labels length {} vs {}".format(len(label_mask), max_seq_len)

        input_ids.append(input_id)
        attention_masks.append(attention_mask)
        token_type_ids.append(token_type_id)
        label_masks.append(label_mask)

    input_ids = np.array(input_ids, dtype=int)
    attention_masks = np.array(attention_masks, dtype=int)
    token_type_ids = np.array(token_type_ids, dtype=int)
    label_masks = np.asarray(label_masks, dtype=np.int32)

    return (input_ids, attention_masks, token_type_ids), label_masks

In [31]:
def process_doc(doc):
    processed_doc = doc.strip('"').replace("\n", " ").strip()
    processed_doc = re.sub('\s+', ' ', processed_doc)
    
    return processed_doc

In [32]:
def ner_prediction(model, examples, max_seq_len, tokenizer, isTokenized=False):
    if isTokenized == False:
        examples = [report.split(' ') for report in examples]
        X_pred, label_masks = convert_examples_to_features_for_prediction(examples, max_seq_len=max_seq_len, tokenizer=tokenizer)
    elif isTokenized == True:
        X_pred, label_masks = convert_examples_to_features_for_prediction(examples, max_seq_len=max_seq_len, tokenizer=tokenizer)
    
    y_predicted = model.predict(X_pred)
    y_predicted = np.argmax(y_predicted.logits, axis = 2)

    pred_list = []
    result_list = []

    for i in range(0, len(label_masks)):
        pred_tag = []
        for label_index, pred_index in zip(label_masks[i], y_predicted[i]):
            pred_tag.append(index_to_tag[pred_index])
            
        pred_list.append(pred_tag)

    for example, pred in zip(examples, pred_list):
        one_sample_result = []
        for one_word, label_token in zip(example, pred):
            one_sample_result.append((one_word, label_token))
        result_list.append(one_sample_result)

    return result_list

In [33]:
def tag_to_sequence(result_list):
    pred_list = []
    for i in range(len(result_list)):
        continue_flag = False
        keyphrase_list = []
        for token, tag in result_list[i]:
            if continue_flag == True and tag == "O":
                keyphrase = " ".join(sequence)
                keyphrase_list.append(keyphrase)
                sequence = []
                continue_flag = False
            elif continue_flag == True and tag != "O":
                sequence.append(token)
                continue_flag = True
            elif continue_flag == False and tag == "O":
                continue
            elif continue_flag == False and tag != "O":
                sequence = []
                sequence.append(token)
                continue_flag = True
        pred_list.append(keyphrase_list)
    return pred_list

In [34]:
def inference(model, test_report_list, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer):
    assert isinstance(test_report_list, list) or isinstance(test_report_list, pd.core.series.Series), f"input이 {type(test_report_list)}임"
    
    result_list = ner_prediction(model, test_report_list, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer)   
    predict_keyphrases_list = tag_to_sequence(result_list)
    
    return predict_keyphrases_list

In [35]:
test_raw_df = pd.read_csv("../data/rawdata/GYRO_testset.csv")

In [36]:
predict_keyphrases_list = inference(load_model, [test_raw_df["본문"][0]])
predict_keyphrases_list

100% 1/1 [00:00<00:00, 122.81it/s]






[['직각으로 표기되어 있음) Z3로 진입하여 관제사로부터 수정 지시를 받아 F7을 거쳐 F로 정상', '승무원']]

In [37]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, fbeta_score

In [38]:
idx=0
test_token_list = []
test_tag_list = []

prev_i = 0
token_list = []
tag_list = []
        
while idx < len(test_df):
    if prev_i == test_df["i"][idx]:
        token_list.append(test_df["token"][idx])
        tag_list.append(test_df["tag"][idx])
    else:
        test_token_list.append(token_list)
        test_tag_list.append(tag_list)
        token_list = []
        token_list.append(test_df["token"][idx])
        tag_list = []
        tag_list.append(test_df["tag"][idx])
        
    prev_i = test_df["i"][idx]
    idx+=1
test_token_list.append(token_list)
test_tag_list.append(tag_list)

In [39]:
result_list = ner_prediction(load_model, test_token_list, max_seq_len=MAX_SEQ_LEN, tokenizer=tokenizer, isTokenized=True)

100% 219/219 [00:02<00:00, 103.06it/s]




In [40]:
transformed_list = [label for sublist in result_list for _, label in sublist]
test_df["tag"]

0        O
1        O
2        O
3        O
4        O
        ..
29701    O
29702    O
29703    O
29704    O
29705    O
Name: tag, Length: 29706, dtype: object

In [41]:
print(precision_score(test_df["tag"], transformed_list, average="micro"))
print(precision_score(test_df["tag"], transformed_list, average="macro"))
print(precision_score(test_df["tag"], transformed_list, average="weighted"))
print(precision_score(test_df["tag"], transformed_list, average=None))
print(precision_score(test_df["tag"], transformed_list, average='micro', labels=["key"]))

0.798491887160843
0.49294925560108416
0.7480126172903924
[0.85306866 0.13282985]
0.13282985339848957


In [42]:
print(recall_score(test_df["tag"], transformed_list, average="micro"))
print(recall_score(test_df["tag"], transformed_list, average="macro"))
print(recall_score(test_df["tag"], transformed_list, average="weighted"))
print(recall_score(test_df["tag"], transformed_list, average=None))
print(recall_score(test_df["tag"], transformed_list, average='micro', labels=["key"]))

0.798491887160843
0.4960365680533857
0.798491887160843
[0.92306783 0.06900531]
0.06900530810062312


In [43]:
print(f1_score(test_df["tag"], transformed_list, average="micro"))
print(f1_score(test_df["tag"], transformed_list, average="macro"))
print(f1_score(test_df["tag"], transformed_list, average="weighted"))
print(f1_score(test_df["tag"], transformed_list, average=None))
print(f1_score(test_df["tag"], transformed_list, average='micro', labels=["key"]))

0.798491887160843
0.48875756127706127
0.7706021342636008
[0.88668888 0.09082625]
0.09082624544349939


In [45]:
print(fbeta_score(test_df["tag"], transformed_list, beta=2, average="micro"))
print(fbeta_score(test_df["tag"], transformed_list, beta=2, average="macro"))
print(fbeta_score(test_df["tag"], transformed_list, beta=2, average="weighted"))
print(fbeta_score(test_df["tag"], transformed_list, beta=2, average=None))
print(fbeta_score(test_df["tag"], transformed_list, beta=2, average='micro', labels=["key"]))

0.798491887160843
0.4922527721045482
0.7868319289013204
[0.90816382 0.07634172]
0.07634172496553132


In [46]:
from sklearn.metrics import classification_report

In [47]:
print(classification_report(test_df["tag"], transformed_list, output_dict=False, target_names=['O', 'key']))

              precision    recall  f1-score   support

           O       0.85      0.92      0.89     25373
         key       0.13      0.07      0.09      4333

    accuracy                           0.80     29706
   macro avg       0.49      0.50      0.49     29706
weighted avg       0.75      0.80      0.77     29706



In [48]:
exit()