In [1]:
# 1. 라이브러리 설치
!pip install transformers torch sentencepiece pandas openpyxl

# 2. Colab 환경에서 파일 업로드 기능
from google.colab import files
uploaded = files.upload()  # 'input.xlsx'를 업로드하면 uploaded 딕셔너리에 저장됨

# 3. 라이브러리 임포트
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import pandas as pd
import io

# 4. MBART 모델 로드
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)

# 5. 백번역 함수 정의
def back_translate(sentence):
    # 한국어 → 영어
    tokenizer.src_lang = "ko_KR"
    encoded_ko = tokenizer(sentence, return_tensors="pt")
    translated_ids = model.generate(**encoded_ko, forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
    translated_text = tokenizer.batch_decode(translated_ids, skip_special_tokens=True)[0]

    # 영어 → 한국어
    tokenizer.src_lang = "en_XX"
    encoded_en = tokenizer(translated_text, return_tensors="pt")
    back_translated_ids = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id["ko_KR"])
    final_sentence = tokenizer.batch_decode(back_translated_ids, skip_special_tokens=True)[0]

    return final_sentence

# 6. 업로드된 'input.xlsx' 파일을 불러와서 'question' 열에 대해 백번역 수행
#   uploaded 딕셔너리에 'input.xlsx'가 존재한다고 가정
df = pd.read_excel(io.BytesIO(uploaded["input.xlsx"]))
df["back_translated"] = df["question"].apply(back_translate)

# 7. 결과를 'output.xlsx'로 저장
df.to_excel("output.xlsx", index=False)
print("백번역 완료! 'output.xlsx' 파일로 저장되었습니다.")


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

KeyboardInterrupt: 