In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.3-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 5.2 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)
[K     |████████████████████████████████| 120 kB 52.0 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 39.7 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.21.3


In [4]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.4.0-py3-none-any.whl (365 kB)
[K     |████████████████████████████████| 365 kB 5.0 MB/s 
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 47.3 MB/s 
[?25hCollecting multiprocess
  Downloading multiprocess-0.70.13-py37-none-any.whl (115 kB)
[K     |████████████████████████████████| 115 kB 59.7 MB/s 
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1
  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)
[K     |████████████████████████████████| 127 kB 69.9 MB/s 
Installing collected packages: urllib3, xxhash, responses, multiprocess, datasets
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.24.3
    Uninstalling urllib3

# 모듈 import 및 전역 변수 설정

In [13]:
import json
import os

import torch
import torch.nn as nn
from tqdm import trange
from transformers import XLMRobertaModel, AutoTokenizer
from torch.utils.data import DataLoader, TensorDataset
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW
from datasets import load_metric
from sklearn.metrics import f1_score
import pandas as pd
import copy

PADDING_TOKEN = 1
S_OPEN_TOKEN = 0
S_CLOSE_TOKEN = 2

do_eval=True

category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/'
polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/'

test_category_extraction_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/category_extraction/saved_model_example.pt'
test_polarity_classification_model_path = '/content/drive/MyDrive/korean_baseline/saved_model/polarity_classification/saved_model_example.pt'

train_data_path = '/content/drive/MyDrive/korean_baseline/data/sample.jsonl'
dev_data_path = '/content/drive/MyDrive/korean_baseline/data/sample.jsonl'
test_data_path = '/content/drive/MyDrive/korean_baseline/data/sample.jsonl'

max_len = 256
batch_size = 8
base_model = 'xlm-roberta-base'
learning_rate = 3e-6
eps = 1e-8
num_train_epochs = 20
classifier_hidden_size = 768
classifier_dropout_prob = 0.1

entity_property_pair = [
    '제품 전체#일반', '제품 전체#가격', '제품 전체#디자인', '제품 전체#품질', '제품 전체#편의성', '제품 전체#인지도',
    '본품#일반', '본품#디자인', '본품#품질', '본품#편의성', '본품#다양성',
    '패키지/구성품#일반', '패키지/구성품#디자인', '패키지/구성품#품질', '패키지/구성품#편의성', '패키지/구성품#다양성',
    '브랜드#일반', '브랜드#가격', '브랜드#디자인', '브랜드#품질', '브랜드#인지도',
                    ]

tf_id_to_name = ['True', 'False']
tf_name_to_id = {tf_id_to_name[i]: i for i in range(len(tf_id_to_name))}

polarity_id_to_name = ['positive', 'negative', 'neutral']
polarity_name_to_id = {polarity_id_to_name[i]: i for i in range(len(polarity_id_to_name))}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

special_tokens_dict = {
    'additional_special_tokens': ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
}

json 및 jsonl 파일 read, write 함수

In [14]:
def jsonload(fname, encoding="utf-8"):
    with open(fname, encoding=encoding) as f:
        j = json.load(f)

    return j


# json 개체를 파일이름으로 깔끔하게 저장
def jsondump(j, fname):
    with open(fname, "w", encoding="UTF8") as f:
        json.dump(j, f, ensure_ascii=False)

# jsonl 파일 읽어서 list에 저장
def jsonlload(fname, encoding="utf-8"):
    json_list = []
    with open(fname, encoding=encoding) as f:
        for line in f.readlines():
            json_list.append(json.loads(line))
    return json_list

jsonlload('/content/drive/MyDrive/korean_baseline/data/sample.jsonl')

[{'id': 'nikluge-sa-2022-train-00001',
  'sentence_form': '둘쨋날은 미친듯이 밟아봤더니 기어가 헛돌면서 틱틱 소리가 나서 경악.',
  'annotation': [['본품#품질', ['기어', 16, 18], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00002',
  'sentence_form': '이거 뭐 삐꾸를 준 거 아냐 불안하고, 거금 투자한 게 왜 이래.. 싶어서 정이 확 떨어졌는데 산 곳 가져가서 확인하니 기어 텐션 문제라고 고장 아니래.',
  'annotation': [['본품#품질', ['기어 텐션', 67, 72], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00003',
  'sentence_form': '간사하게도 그 이후에는 라이딩이 아주 즐거워져서 만족스럽게 탔다.',
  'annotation': [['제품 전체#일반', [None, 0, 0], 'positive']]},
 {'id': 'nikluge-sa-2022-train-00004',
  'sentence_form': '샥이 없는 모델이라 일반 도로에서 타면 노면의 진동 때문에 손목이 덜덜덜 떨리고 이가 부딪칠 지경인데 이마저도 며칠 타면서 익숙해지니 신경쓰이지 않게 됐다.',
  'annotation': [['제품 전체#일반', ['샥이 없는 모델', 0, 8], 'neutral']]},
 {'id': 'nikluge-sa-2022-train-00005',
  'sentence_form': '안장도 딱딱해서 엉덩이가 아팠는데 무시하고 타고 있다.',
  'annotation': [['본품#일반', ['안장', 0, 2], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00006',
  'sentence_form': '지금 내 실력과 저질 체력으로는 이 정도 자전거도 되게 훌륭한 거라는..',
  'annotation'

# 모델 정의
xlm-roberta 모델을 기반으로 한 classification 모델 이용

In [15]:
class SimpleClassifier(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        self.dense = nn.Linear(classifier_hidden_size, classifier_hidden_size)
        self.dropout = nn.Dropout(classifier_dropout_prob)
        self.output = nn.Linear(classifier_hidden_size, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x


class RoBertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(RoBertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.xlm_roberta = XLMRobertaModel.from_pretrained(base_model)
        self.xlm_roberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.xlm_roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


# 데이터 파싱 및 tokenization 함수 정의


In [16]:

def tokenize_and_align_labels(tokenizer, form, annotations, max_len):

    entity_property_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }
    polarity_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }

    for pair in entity_property_pair:
        isPairInOpinion = False
        if pd.isna(form):
            break
        tokenized_data = tokenizer(form, pair, padding='max_length', max_length=max_len, truncation=True)
        for annotation in annotations:
            entity_property = annotation[0]
            polarity = annotation[2]

            if polarity == '------------':
                continue

            if entity_property == pair:
                entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
                entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                entity_property_data_dict['label'].append(tf_name_to_id['True'])

                polarity_data_dict['input_ids'].append(tokenized_data['input_ids'])
                polarity_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                polarity_data_dict['label'].append(polarity_name_to_id[polarity])

                isPairInOpinion = True
                break

        if isPairInOpinion is False:
            entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
            entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
            entity_property_data_dict['label'].append(tf_name_to_id['False'])

    return entity_property_data_dict, polarity_data_dict


def get_dataset(raw_data, tokenizer, max_len):
    input_ids_list = []
    attention_mask_list = []
    token_labels_list = []

    polarity_input_ids_list = []
    polarity_attention_mask_list = []
    polarity_token_labels_list = []

    for utterance in raw_data:
        entity_property_data_dict, polarity_data_dict = tokenize_and_align_labels(tokenizer, utterance['sentence_form'], utterance['annotation'], max_len)
        input_ids_list.extend(entity_property_data_dict['input_ids'])
        attention_mask_list.extend(entity_property_data_dict['attention_mask'])
        token_labels_list.extend(entity_property_data_dict['label'])

        polarity_input_ids_list.extend(polarity_data_dict['input_ids'])
        polarity_attention_mask_list.extend(polarity_data_dict['attention_mask'])
        polarity_token_labels_list.extend(polarity_data_dict['label'])

    return TensorDataset(torch.tensor(input_ids_list), torch.tensor(attention_mask_list),
                         torch.tensor(token_labels_list)), TensorDataset(torch.tensor(polarity_input_ids_list), torch.tensor(polarity_attention_mask_list),
                         torch.tensor(polarity_token_labels_list))



# 모델 학습

In [18]:
def evaluation(y_true, y_pred, label_len):
    count_list = [0]*label_len
    hit_list = [0]*label_len
    for i in range(len(y_true)):
        count_list[y_true[i]] += 1
        if y_true[i] == y_pred[i]:
            hit_list[y_true[i]] += 1
    acc_list = []

    for i in range(label_len):
        acc_list.append(hit_list[i]/count_list[i])

    print(count_list)
    print(hit_list)
    print(acc_list)
    print('accuracy: ', (sum(hit_list) / sum(count_list)))
    print('macro_accuracy: ', sum(acc_list) / 3)
    # print(y_true)

    y_true = list(map(int, y_true))
    y_pred = list(map(int, y_pred))

    print('f1_score: ', f1_score(y_true, y_pred, average=None))
    print('f1_score_micro: ', f1_score(y_true, y_pred, average='micro'))
    print('f1_score_macro: ', f1_score(y_true, y_pred, average='macro'))

def train_sentiment_analysis():

    print('train_sentiment_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')
    train_data = jsonlload(train_data_path)
    dev_data = jsonlload(dev_data_path)

    print('tokenizing train data')
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    print('We have added', num_added_toks, 'tokens')
    entity_property_train_data, polarity_train_data = get_dataset(train_data, tokenizer, max_len)
    entity_property_dev_data, polarity_dev_data = get_dataset(dev_data, tokenizer, max_len)
    entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                  batch_size=batch_size)
    entity_property_dev_dataloader = DataLoader(entity_property_dev_data, shuffle=True,
                                batch_size=batch_size)

    polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                  batch_size=batch_size)
    polarity_dev_dataloader = DataLoader(polarity_dev_data, shuffle=True,
                                                batch_size=batch_size)

    print('loading model')
    entity_property_model = RoBertaBaseClassifier(len(tf_id_to_name), len(tokenizer))
    entity_property_model.to(device)

    polarity_model = RoBertaBaseClassifier(len(polarity_id_to_name), len(tokenizer))
    polarity_model.to(device)


    print('end loading')

    # entity_property_model_optimizer_setting
    FULL_FINETUNING = True
    if FULL_FINETUNING:
        entity_property_param_optimizer = list(entity_property_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        entity_property_optimizer_grouped_parameters = [
            {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
        entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

    entity_property_optimizer = AdamW(
        entity_property_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(entity_property_train_dataloader)

    entity_property_scheduler = get_linear_schedule_with_warmup(
        entity_property_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    # polarity_model_optimizer_setting
    if FULL_FINETUNING:
        polarity_param_optimizer = list(polarity_model.named_parameters())
        no_decay = ['bias', 'gamma', 'beta']
        polarity_optimizer_grouped_parameters = [
            {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.01},
            {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
             'weight_decay_rate': 0.0}
        ]
    else:
        polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
        polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

    polarity_optimizer = AdamW(
        polarity_optimizer_grouped_parameters,
        lr=learning_rate,
        eps=eps
    )
    epochs = num_train_epochs
    max_grad_norm = 1.0
    total_steps = epochs * len(polarity_train_dataloader)

    polarity_scheduler = get_linear_schedule_with_warmup(
        polarity_optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )


    epoch_step = 0

    for _ in trange(epochs, desc="Epoch"):
        entity_property_model.train()
        epoch_step += 1

        # entity_property train
        entity_property_total_loss = 0

        for step, batch in enumerate(entity_property_train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            entity_property_model.zero_grad()

            loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            entity_property_total_loss += loss.item()
            # print('batch_loss: ', loss.item())

            torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
            entity_property_optimizer.step()
            entity_property_scheduler.step()

        avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = category_extraction_model_path + 'saved_model_epoch_' + str(epoch_step) + '.pt'
        torch.save(entity_property_model.state_dict(), model_saved_path)

        if do_eval:
            entity_property_model.eval()

            pred_list = []
            label_list = []

            for batch in entity_property_dev_dataloader:
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_input_mask, b_labels = batch

                with torch.no_grad():
                    loss, logits = entity_property_model(b_input_ids, b_input_mask, b_labels)

                predictions = torch.argmax(logits, dim=-1)
                pred_list.extend(predictions)
                label_list.extend(b_labels)

            evaluation(label_list, pred_list, len(tf_id_to_name))


        # polarity train
        polarity_total_loss = 0
        polarity_model.train()

        for step, batch in enumerate(polarity_train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            b_input_ids, b_input_mask, b_labels = batch

            polarity_model.zero_grad()

            loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

            loss.backward()

            polarity_total_loss += loss.item()
            # print('batch_loss: ', loss.item())

            torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
            polarity_optimizer.step()
            polarity_scheduler.step()

        avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
        print("Entity_Property_Epoch: ", epoch_step)
        print("Average train loss: {}".format(avg_train_loss))

        model_saved_path = polarity_classification_model_path + 'saved_model_epoch_' + str(epoch_step) + '.pt'
        torch.save(polarity_model.state_dict(), model_saved_path)

        if do_eval:
            polarity_model.eval()

            pred_list = []
            label_list = []

            for batch in polarity_dev_dataloader:
                batch = tuple(t.to(device) for t in batch)
                b_input_ids, b_input_mask, b_labels = batch

                with torch.no_grad():
                    loss, logits = polarity_model(b_input_ids, b_input_mask, b_labels)

                predictions = torch.argmax(logits, dim=-1)
                pred_list.extend(predictions)
                label_list.extend(b_labels)

            evaluation(label_list, pred_list, len(polarity_id_to_name))

    print("training is done")


In [None]:
train_sentiment_analysis()

# 모델 평가

학습된 모델을 바탕으로 국어원 데이터 형태를 만드는 방법 예시

In [19]:

def predict_from_korean_form(tokenizer, ce_model, pc_model, data):

    ce_model.to(device)
    ce_model.eval()
    for sentence in data:
        form = sentence['sentence_form']
        sentence['annotation'] = []
        if type(form) != str:
            print("form type is arong: ", form)
            continue
        for pair in entity_property_pair:
            

            tokenized_data = tokenizer(form, pair, padding='max_length', max_length=256, truncation=True)

            input_ids = torch.tensor([tokenized_data['input_ids']]).to(device)
            attention_mask = torch.tensor([tokenized_data['attention_mask']]).to(device)
            with torch.no_grad():
                _, ce_logits = ce_model(input_ids, attention_mask)

            ce_predictions = torch.argmax(ce_logits, dim = -1)

            ce_result = tf_id_to_name[ce_predictions[0]]

            if ce_result == 'True':
                with torch.no_grad():
                    _, pc_logits = pc_model(input_ids, attention_mask)

                pc_predictions = torch.argmax(pc_logits, dim=-1)
                pc_result = polarity_id_to_name[pc_predictions[0]]

                sentence['annotation'].append([pair, pc_result])


    return data


F1 score 계산 - 추출 성능 및 전체 성능에 대한 F1 score 따로 계산

In [17]:

def evaluation_f1(true_data, pred_data):

    true_data_list = true_data
    pred_data_list = pred_data

    ce_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    pipeline_eval = {
        'TP': 0,
        'FP': 0,
        'FN': 0,
        'TN': 0
    }

    for i in range(len(true_data_list)):

        # TP, FN checking
        is_ce_found = False
        is_pipeline_found = False
        for y_ano  in true_data_list[i]['annotation']:
            y_category = y_ano[0]
            y_polarity = y_ano[2]

            for p_ano in pred_data_list[i]['annotation']:
                p_category = p_ano[0]
                p_polarity = p_ano[1]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is True:
                ce_eval['TP'] += 1
            else:
                ce_eval['FN'] += 1

            if is_pipeline_found is True:
                pipeline_eval['TP'] += 1
            else:
                pipeline_eval['FN'] += 1

            is_ce_found = False
            is_pipeline_found = False

        # FP checking
        for p_ano in pred_data_list[i]['annotation']:
            p_category = p_ano[0]
            p_polarity = p_ano[1]

            for y_ano  in true_data_list[i]['annotation']:
                y_category = y_ano[0]
                y_polarity = y_ano[2]

                if y_category == p_category:
                    is_ce_found = True
                    if y_polarity == p_polarity:
                        is_pipeline_found = True

                    break

            if is_ce_found is False:
                ce_eval['FP'] += 1

            if is_pipeline_found is False:
                pipeline_eval['FP'] += 1
            is_ce_found = False
            is_pipeline_found = False

    ce_precision = ce_eval['TP']/(ce_eval['TP']+ce_eval['FP'])
    ce_recall = ce_eval['TP']/(ce_eval['TP']+ce_eval['FN'])

    ce_result = {
        'Precision': ce_precision,
        'Recall': ce_recall,
        'F1': 2*ce_recall*ce_precision/(ce_recall+ce_precision)
    }

    pipeline_precision = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FP'])
    pipeline_recall = pipeline_eval['TP']/(pipeline_eval['TP']+pipeline_eval['FN'])

    pipeline_result = {
        'Precision': pipeline_precision,
        'Recall': pipeline_recall,
        'F1': 2*pipeline_recall*pipeline_precision/(pipeline_recall+pipeline_precision)
    }

    return {
        'category extraction result': ce_result,
        'entire pipeline result': pipeline_result
    }


테스트 데이터에 대한 평가

In [25]:

def test_sentiment_analysis():

    tokenizer = AutoTokenizer.from_pretrained(base_model)
    num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
    test_data = jsonlload(test_data_path)

    entity_property_test_data, polarity_test_data = get_dataset(test_data, tokenizer, max_len)

    entity_property_test_dataloader = DataLoader(entity_property_test_data, shuffle=True,
                                batch_size=batch_size)

    polarity_test_dataloader = DataLoader(polarity_test_data, shuffle=True,
                                                  batch_size=batch_size)
    
    model = RoBertaBaseClassifier(len(tf_id_to_name), len(tokenizer))
    model.load_state_dict(torch.load(test_category_extraction_model_path, map_location=device))
    model.to(device)
    model.eval()
            
    polarity_model = RoBertaBaseClassifier(len(polarity_id_to_name), len(tokenizer))
    polarity_model.load_state_dict(torch.load(test_polarity_classification_model_path, map_location=device))
    polarity_model.to(device)
    polarity_model.eval()

    pred_data = predict_from_korean_form(tokenizer, model, polarity_model, copy.deepcopy(test_data))

    # jsondump(pred_data, './pred_data.json')
    # pred_data = jsonload('./pred_data.json')

    print('F1 result: ', evaluation_f1(test_data, pred_data))

In [26]:
test_sentiment_analysis()

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are ini

['본품#품질', ['기어', 16, 18], 'negative']
['본품#품질', ['기어 텐션', 67, 72], 'negative']
['제품 전체#일반', [None, 0, 0], 'positive']
['제품 전체#일반', ['샥이 없는 모델', 0, 8], 'neutral']
['본품#일반', ['안장', 0, 2], 'negative']
['제품 전체#일반', ['자전거', 23, 26], 'positive']
['본품#품질', ['내장 기어 3단', 0, 8], 'positive']
['본품#품질', ['UD20', 14, 18], 'negative']
['본품#품질', ['자막 검색 후 등록 기능', 11, 24], 'negative']
['본품#품질', ['등록]키', 3, 7], 'negative']
['본품#품질', ['부가 기능', 3, 8], 'positive']
['제품 전체#일반', [None, 0, 0], 'negative']
['제품 전체#일반', [None, 0, 0], 'negative']
['제품 전체#일반', [None, 0, 0], 'negative']
['제품 전체#일반', [None, 0, 0], 'negative']
F1 result:  {'category extraction result': {'Precision': 1.0, 'Recall': 0.6666666666666666, 'F1': 0.8}, 'entire pipeline result': {'Precision': 1.0, 'Recall': 0.6666666666666666, 'F1': 0.8}}
