# 0. Import

In [1]:
import pandas as pd
from transformers import AutoModel, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import similarity_metric as sm
import formality_metric as fm

In [3]:
def switch_from_string_to_integer(mungchi_string):
    # 슬래시와 공백을 제외한 글자 수를 계산
    parts = mungchi_string.split('/')  # 슬래시를 기준으로 문자열을 나눔
    mungchi_integer = [len(part.strip()) for part in parts]  # 각 부분을 공백 제거 후 길이 계산
    return mungchi_integer

# 1. Load Dataset

In [4]:
dataframe = pd.read_json('./../data/dataset_v2.0.json')
print(dataframe.columns)
print(dataframe.shape)

Index(['title', 'lyrics', 'genre', 'check_only_korean', 'line_samples',
       'verse_samples', 'total_samples', 'line_sample_word_mungchi_string',
       'line_sample_word_mungchi_integer', 'line_sample_line_mungchi_string',
       'line_sample_line_mungchi_integer', 'verse_sample_word_mungchi_string',
       'verse_sample_word_mungchi_integer', 'verse_sample_line_mungchi_string',
       'verse_sample_line_mungchi_integer', 'total_sample_word_mungchi_string',
       'total_sample_word_mungchi_integer', 'total_sample_line_mungchi_string',
       'total_sample_line_mungchi_integer'],
      dtype='object')
(6090, 19)


In [5]:
df = dataframe.copy()

# 2. Inference
- 가상의 테스트셋에 해당하는 노래 10곡
- 여기서 input으로 요청하는 음절수가 golden_mungchi_integer에 저장되어야 함.

In [6]:
temp_test_df = df.loc[df["check_only_korean"]==True].sample(n=10, random_state=42)
temp_test_df.shape

(10, 19)

# 3. Evaluation

In [7]:
# 의미 유사도 산출을 위한 encoder 불러오기
# model = AutoModel.from_pretrained("kakaobank/kf-deberta-base")
# tokenizer = AutoTokenizer.from_pretrained("kakaobank/kf-deberta-base")
model = AutoModel.from_pretrained("klue/roberta-base")
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-base")

  return self.fget.__get__(instance, owner)()


In [8]:
predict_mungchi_integer_list = []
semantic_sim_list = []
lexical_sim_list = []
acc_form_list = []
mse_form_list = []
our_form_list = []

for row in range(len(temp_test_df)):
    
    golden_lyrics = temp_test_df.iloc[row]['lyrics']
    
    # golden_mungchi_integer = 실제 input으로 들어가는 음절 수
    # type : list [2,2,2]
    golden_mungchi_integer = temp_test_df.iloc[row]['line_sample_word_mungchi_integer'][0]
    # golden_mungchi_string = 생성된 가사와 비교해볼 string -> 실제로 사용은 안됨.
    # type : 뭉치가 ' / '로 구분된 하나의 str ('내가 / 만든 / 가사')
    golden_mungchi_string = temp_test_df.iloc[row]['line_sample_word_mungchi_string'][0]
    # predict_mungchi_string = 동일 주제, 동일 장르로 생성된 string
    # type : 뭉치가 ' / '로 구분된 하나의 str ('내가 / 만든 / 가사')
    predict_mungchi_string = temp_test_df.iloc[row]['line_sample_word_mungchi_string'][0]
    
    predict_mungchi_integer = switch_from_string_to_integer(predict_mungchi_string)
    predict_mungchi_integer_list.append(predict_mungchi_integer)
    
    # check log
    # print(f'<golden_lyrics>\n{golden_lyrics}\n')
    # print(f'<predict_mungchi>\n{predict_mungchi_string}')
    
    # evaluate test data
    semantic_sim = sm.eval_semantic_sim(model, tokenizer, golden_lyrics, predict_mungchi_string)
    lexical_sim = sm.eval_lexical_sim_bleu(golden_lyrics, predict_mungchi_string)
    acc_form, mse_form = fm.eval_form(golden_mungchi_integer, predict_mungchi_integer)
    our_form = fm.eval_our_form(golden_mungchi_integer, predict_mungchi_integer)
    
    # save scores
    semantic_sim_list.append(semantic_sim)
    lexical_sim_list.append(lexical_sim)
    acc_form_list.append(acc_form)
    mse_form_list.append(mse_form)
    our_form_list.append(our_form)

eval_df = pd.DataFrame({'original_lyrics' : temp_test_df['lyrics'],
                        'input_mungchi_integer' : temp_test_df['line_sample_word_mungchi_integer'],
                        'input_mungchi_string' : temp_test_df['line_sample_word_mungchi_string'],
                        'generated_mungchi_string' : temp_test_df['line_sample_word_mungchi_string'],
                        'generated_mungchi_integer' : predict_mungchi_integer_list,
                        'semantic_sim' : semantic_sim_list,
                        'bleu_lexical_sim' : lexical_sim_list,
                        'acc_form' : acc_form_list,
                        'mse_form' : mse_form_list,
                        'our_form' : our_form_list})

# save evaluation result
eval_df.to_csv('evalutation_result.csv', index=False, encoding='utf-8-sig')

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, indepe