In [1]:
import os
import docx
import openai

# Define the directory
src_dir = 'ukr'
dst_dir = 'eng'

# List all .docx files in the directory
doc_files = [file for file in os.listdir(src_dir) if file.endswith('.docx')]

# Print the list of .docx files
print(doc_files)

['01_Rafajenko.docx', '02_polozij.docx', '03_Babkina.docx', '04_hehelska.docx', '05_smyrnowa.docx', '06_perehuda.docx', '07_faf.docx', '08_klymow.docx', '09_dolenyk.docx', '10_skulatowa.docx', '11_lymycz.docx', '12_nalada.docx', '13_zimina.docx', '14_bahriana.docx', '15_berezko.docx', '16_kija.docx', '17_kowalska.docx', '18_macko.docx', '19_nakoneczna.docx', '20_budzak.docx', '21_kononcz.docx', '22_janicka.docx', '23_steciuk.docx', '24_rudak.docx', '25_permiakow.docx', '26_lelyk.docx', '27_hakutna.docx', '28_olijnyk.docx', '29_szpylewska.docx', '30_diomina.docx', '31_wertola.docx', '32_pawluk.docx', '33_czerkaj.docx', '34_makarczuk.docx', '35_ponomar.docx', '36_derkaczowa.docx', '37_horbatiuk.docx', '38_szewlakowa.docx', '39_czerewan.docx']


In [2]:
system_prompt = """
You are a translator of literature that takes into account cultural diferences. You are translating Ukrainian short stories
preserving their original meaning and style including strong emotions during the war that is happening in Ukraine.
"""
user_prompt = "Translate the following text into {target_language}:\n\n{text}"

def translate_text(text, target_language):
    response = openai.chat.completions.create(
        model = "gpt-4o",
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt.format(text=text, target_language=target_language)},
        ],
    )
    return response.choices[0].message.content.strip()

In [3]:
def export_docx(filename, text):
    doc = docx.Document()
    doc.settings.element.append(docx.oxml.shared.OxmlElement('w:trackRevisions'))
    styles_element = doc.styles.element
    rpr_default = styles_element.xpath('./w:docDefaults/w:rPrDefault/w:rPr')[0]
    lang_default = rpr_default.xpath('w:lang')[0]
    lang_default.set(docx.oxml.shared.qn('w:val'),'en-US')
    for line in text.split('\n'):
        paragraph = doc.add_paragraph(line)
    save_path = os.path.join(dst_dir, filename)
    doc.save(save_path)

In [4]:
text_dict = {}
for doc_file in doc_files:
    file_path = os.path.join(src_dir, doc_file)
    doc = docx.Document(file_path)
    text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
    text_dict[doc_file] = text

In [5]:
# Create the dst_dir directory if it doesn't exist
if not os.path.exists(dst_dir):
    os.makedirs(dst_dir)

for doc_file in doc_files:
    print(f"Translating {doc_file}...")
    translated_text = translate_text(text_dict[doc_file], 'English')
    print(f"Exporting {doc_file}...")
    export_docx(doc_file, translated_text)


    

Translating 01_Rafajenko.docx...
Exporting 01_Rafajenko.docx...
Translating 02_polozij.docx...
Exporting 02_polozij.docx...
Translating 03_Babkina.docx...
Exporting 03_Babkina.docx...
Translating 04_hehelska.docx...
Exporting 04_hehelska.docx...
Translating 05_smyrnowa.docx...
Exporting 05_smyrnowa.docx...
Translating 06_perehuda.docx...
Exporting 06_perehuda.docx...
Translating 07_faf.docx...
Exporting 07_faf.docx...
Translating 08_klymow.docx...
Exporting 08_klymow.docx...
Translating 09_dolenyk.docx...
Exporting 09_dolenyk.docx...
Translating 10_skulatowa.docx...
Exporting 10_skulatowa.docx...
Translating 11_lymycz.docx...
Exporting 11_lymycz.docx...
Translating 12_nalada.docx...
Exporting 12_nalada.docx...
Translating 13_zimina.docx...
Exporting 13_zimina.docx...
Translating 14_bahriana.docx...
Exporting 14_bahriana.docx...
Translating 15_berezko.docx...
Exporting 15_berezko.docx...
Translating 16_kija.docx...
Exporting 16_kija.docx...
Translating 17_kowalska.docx...
Exporting 17_k