Skip to content

Commit

Permalink
fix opencc serialization error (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
chenhesen committed Nov 17, 2023
1 parent 9497ce5 commit 62c5fb5
Showing 1 changed file with 8 additions and 4 deletions.
12 changes: 8 additions & 4 deletions data_juicer/ops/mapper/chinese_convert_mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import opencc

from ..base_op import OPERATORS, Mapper


def prepare_converter(mode):
global OPENCC_CONVERTER
import opencc
OPENCC_CONVERTER = opencc.OpenCC(mode + '.json')


@OPERATORS.register_module('chinese_convert_mapper')
class ChineseConvertMapper(Mapper):
"""Mapper to convert Chinese between Traditional Chinese, Simplified Chinese
Expand Down Expand Up @@ -39,9 +43,9 @@ def __init__(self, mode: str = 's2t', *args, **kwargs):
]
assert mode in mode_list, 'Please make sure mode is one of {}'.format(
mode_list)
self.converter = opencc.OpenCC(mode + '.json')
prepare_converter(mode)

def process(self, sample):

sample[self.text_key] = self.converter.convert(sample[self.text_key])
sample[self.text_key] = OPENCC_CONVERTER.convert(sample[self.text_key])
return sample

0 comments on commit 62c5fb5

Please sign in to comment.