In [6]:
!pip install protobuf

Collecting protobuf
  Downloading protobuf-5.28.3-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Downloading protobuf-5.28.3-cp310-abi3-win_amd64.whl (431 kB)
Installing collected packages: protobuf
Successfully installed protobuf-5.28.3


In [47]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast

# 1. 번역 모델과 토크나이저 불러오기
model_name_translation = "facebook/mbart-large-50-many-to-many-mmt"
translation_model = MBartForConditionalGeneration.from_pretrained(model_name_translation)
translation_tokenizer = MBart50TokenizerFast.from_pretrained(model_name_translation)

def korean_to_english(input_text):

    translation_tokenizer.src_lang = "ko_KR"
    
    encoded_ko = translation_tokenizer(input_text, return_tensors="pt")

    generated_tokens = translation_model.generate(
        **encoded_ko,
        forced_bos_token_id=translation_tokenizer.lang_code_to_id["en_XX"]
    )
    ouput_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    return ouput_text

def english_to_korean(input_text):

    translation_tokenizer.src_lang = "en_XX"
    
    encoded_en = translation_tokenizer(input_text, return_tensors="pt")

    generated_tokens = translation_model.generate(
        **encoded_en,
        forced_bos_token_id=translation_tokenizer.lang_code_to_id["ko_KR"]
    )
    output_text = translation_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    translated_texts = output_text 
    
    # 후처리: 잘못된 번역 패턴 수정
    translated_texts = [postprocess_translation(text) for text in output_text]
    
    return translated_texts

def postprocess_translation(text):
    # 번역된 텍스트에서 자주 발생하는 잘못된 번역 패턴을 자동으로 수정하는 함수
    # 일본어에서 잘못 번역된 패턴 -> 한국어로 교정
    corrections = {
        "KRW": "원",  # KRW를 '원'으로 변경
        "営業": "영업",  # 일본어 '영업'을 한국어 '영업'으로 변경
        "Product Inquiry": "제품 문의",  # Product Inquiry를 한국어로 변경
        "Refund Inquiry": "환불 문의",  # Refund Inquiry를 한국어로 변경
        "Shipping Inquiry": "배송 문의",  # Shipping Inquiry를 한국어로 변경
        "Order/Payment Inquiry": "주문/결제 문의",  # Order/Payment Inquiry를 한국어로 변경
        "refundable": "환불",
        "refunded": "환불"
    }
    
    # 잘못된 단어들을 교정
    for wrong, correct in corrections.items():
        text = text.replace(wrong, correct)
    
    return text

model_name = "deepset/roberta-base-squad2"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

question = "상품 교환"

# 한글 질문을 영어로 번역
translated_question = korean_to_english(question)[0]
print(translated_question)

context = "Order/Payment Inquiry: For order/payment-related inquiries regarding overseas purchase agency products, please contact our customer service center at 1234-5678.Shipping Inquiry: Your order will be shipped within 2-3 business days of payment, and it will take approximately 3-5 business days to arrive. However, shipping may be delayed during weekends and holidays. You can track your order on our website.Product Exchange/Return Inquiry: Product exchanges and returns are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be exchanged or returned. You can request an exchange or return by visiting our website, and you will be charged for round-trip international shipping costs (30,000 KRW).Refund Inquiry: Refunds are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be refunded. You can request a refund by visiting our website, and you will be refunded the amount paid minus international shipping costs (30,000 KRW).Product Inquiry: For product-related inquiries, please visit our website. You can find detailed information about the product, including size, color, and material, on the product page. If you have any additional questions, please contact our customer service center at 1234-5678.Other Inquiry: If you have any other questions, please contact our customer service center at 1234-5678."

# a) 예측 얻기
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

QA_input = {
    'question': translated_question,
    'context': context
}

# b) 예측을 담는 변수
res = nlp(QA_input)

en_answer = res['answer']

answer = english_to_korean(en_answer)

for line in answer:
    print(line)

Goods exchange.
상품 교환과 교환은 7일 내에 받아들여집니다.


In [29]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "deepset/roberta-base-squad2"

# a) Get predictions
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
QA_input = {
    'question': 'Give me the phone number of your office.',
    'context' : "Order/Payment Inquiry: For order/payment-related inquiries regarding overseas purchase agency products, please contact our customer service center at 1234-5678.Shipping Inquiry: Your order will be shipped within 2-3 business days of payment, and it will take approximately 3-5 business days to arrive. However, shipping may be delayed during weekends and holidays. You can track your order on our website.Product Exchange/Return Inquiry: Product exchanges and returns are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be exchanged or returned. You can request an exchange or return by visiting our website, and you will be charged for round-trip international shipping costs (30,000 KRW).Refund Inquiry: Refunds are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be refunded. You can request a refund by visiting our website, and you will be refunded the amount paid minus international shipping costs (30,000 KRW).Product Inquiry: For product-related inquiries, please visit our website. You can find detailed information about the product, including size, color, and material, on the product page. If you have any additional questions, please contact our customer service center at 1234-5678.Other Inquiry: If you have any other questions, please contact our customer service center at 1234-5678."
    }
print(QA_input)

res = nlp(QA_input)

# b) Load model & tokenizer
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


res

{'question': 'Give me the phone number of your office.', 'context': 'Order/Payment Inquiry: For order/payment-related inquiries regarding overseas purchase agency products, please contact our customer service center at 1234-5678.Shipping Inquiry: Your order will be shipped within 2-3 business days of payment, and it will take approximately 3-5 business days to arrive. However, shipping may be delayed during weekends and holidays. You can track your order on our website.Product Exchange/Return Inquiry: Product exchanges and returns are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be exchanged or returned. You can request an exchange or return by visiting our website, and you will be charged for round-trip international shipping costs (30,000 KRW).Refund Inquiry: Refunds are accepted within 7 days of receipt. However, if the product is damaged or has been used, it cannot be refunded. You can request a refund by visiting our website, an

{'score': 6.991105294673616e-08,
 'start': 150,
 'end': 159,
 'answer': '1234-5678'}

In [19]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:

def korean_to_english(input_text):

    tokenizer.src_lang = "ko_KR"
    
    encoded_ko = tokenizer(input_text, return_tensors="pt")

    generated_tokens = model.generate(
        **encoded_ko,
        forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"]
    )
    ouput_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    return ouput_text

def english_to_korean(input_text):

    tokenizer.src_lang = "en_XX"
    
    encoded_en = tokenizer(input_text, return_tensors="pt")

    generated_tokens = model.generate(
        **encoded_en,
        forced_bos_token_id=tokenizer.lang_code_to_id["ko_KR"]
    )
    output_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    return output_text


context = """
주문/결제 문의: 해외 구매 대행 상품에 대한 주문/결제 관련 문의는 고객 서비스 센터 1234-5678로 연락해 주세요.

배송 문의: 결제 후 2-3 영업일 이내에 주문이 발송되며, 도착까지 약 3-5 영업일이 소요됩니다. 다만, 주말과 공휴일에는 배송이 지연될 수 있습니다. 주문은 당사 웹사이트에서 추적할 수 있습니다.

상품 교환/반품 문의: 상품의 교환 및 반품은 수령 후 7일 이내에 가능합니다. 단, 상품이 손상되었거나 사용된 경우 교환 또는 반품이 불가능합니다. 교환 또는 반품은 당사 웹사이트에서 요청할 수 있으며, 왕복 국제 배송비(30,000원)가 부과됩니다.

환불 문의: 환불은 수령 후 7일 이내에 가능합니다. 단, 상품이 손상되었거나 사용된 경우 환불이 불가능합니다. 환불 요청은 당사 웹사이트에서 할 수 있으며, 국제 배송비(30,000원)를 제외한 금액이 환불됩니다.

상품 문의: 상품 관련 문의는 당사 웹사이트에서 확인하실 수 있습니다. 상품 페이지에서 사이즈, 색상, 재질 등의 상세 정보를 확인할 수 있습니다. 추가 질문이 있을 경우, 고객 서비스 센터 1234-5678로 문의해 주세요.

기타 문의: 다른 문의사항이 있으시면 고객 서비스 센터 1234-5678로 연락해 주세요.

"""


# 질문 예시
"결제 후 몇일 후에 배송이 이루어지나요?"
"배송은 몇일이 걸리나요?"
"주문한 상품은 언제 도착할까요?"