In [1]:
import openai
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [2]:
client = openai.OpenAI()

def get_completion(prompt, model="gpt-4o"):
    messages = [
        {"role": "system", "content": "You are a custom GPT called Novel Translator. Novel Translator specializes in translating fantasy, wuxia, and xianxia Chinese web novels into English, focusing on accurate translations of cultural references, idiomatic expressions, and genre-specific terminology like cultivator ranks and power systems. It ensures translations are well-formatted, capitalizing only the first letter of character names and leaving them untranslated (e.g., 'Yan Jianyue'). The GPT avoids incorrect translations of names and uses appropriate pronouns, aiming to include a glossary to prevent errors. It provides translations directly without additional conversation, focusing solely on the output quality and accuracy. Novel Translator will now be able to learn from already translated chapters to maintain consistency in style, tone, and formatting as exemplified by translations provided."},
        {"role": "user", "content": prompt}
    ]
    response = client.chat.completions.create(
        model=model,    
        messages=messages,
        temperature=0,
    )
    return response.choices[0].message.content

In [3]:
import requests
from bs4 import BeautifulSoup
import re
import os
from ebooklib import epub

## Scrape and Translate

In [4]:

def scrape(chapter_url):
    html_text = requests.get(chapter_url).text
    soup = BeautifulSoup(html_text, 'lxml')

    # Find the chapter heading
    chapter_heading = soup.find('h1', id='chapter-heading').get_text()

    # Sanitize the chapter heading to create a valid filename
    valid_filename = re.sub(r'[^\w\-_\. ]', '_', chapter_heading)

    # Find the div with class 'text-left'
    text_left_div = soup.find('div', class_='text-left')

    # Find all <p> elements within this div
    paragraphs = text_left_div.find_all('p')

    # Combine the text of each <p> element into one string
    combined_text = ' '.join(p.get_text() for p in paragraphs)

    # Create the folder if it doesn't exist
    # folder_name = 'Flashing_Genius'
    # folder_name = 'RTOC'
    return combined_text, valid_filename, chapter_heading
def translate(combined_text):
    prompt = f"""
Translate the text delimited by triple backticks into English. Make sure the text is well formatted with spacing between sentences.
```{combined_text}```
"""
    translated_text = get_completion(prompt)
    # with open(os.path.join(folder_name,f'{valid_filename}TRANSLATED.txt'), "w") as f:
    #     f.write(response)
    # print(response)
    return translated_text
def create_raw_text_file(translated_text, folder_name, valid_filename):
    with open(os.path.join(folder_name,f'{valid_filename}.txt'), "w") as f:
        f.write(translated_text)
def create_translated_text_file(translated_text, folder_name, valid_filename):
    with open(os.path.join(folder_name,f'{valid_filename}TRANSLATED.txt'), "w") as f:
        f.write(translated_text)

In [5]:
chapter_range = [num for num in range(312, 315)]

# Create Raw Files

In [10]:
for chapter_num in chapter_range:
    chapter_url = f"https://www.fortuneeternal.com/novel/a-regressors-tale-of-cultivation-raw-novel/chapter-{chapter_num}/"
    combined_text, valid_filename, chapter_heading = scrape(chapter_url)
    create_raw_text_file(combined_text, 'RTOC-raw', valid_filename)

## Create Translations

In [11]:
raw_folder = 'RTOC-raw'
translated_folder = 'RTOC'
translate_range = range(363, 403)  # Example: Translate chapters 291 to 292


for chapter_num in translate_range:
    filename = f'A Regressor_s Tale of Cultivation RAW novel - Chapter _{chapter_num}_.txt'
    valid_filename = f'A Regressor_s Tale of Cultivation RAW novel - Chapter _{chapter_num}_'
    
    if os.path.exists(os.path.join(raw_folder, filename)):
        with open(os.path.join(raw_folder, filename), 'r', encoding='utf-8') as file:
            raw_text = file.read()
        # print(raw_text)
        print(f"Translating: {filename}")
        translated_text = translate(raw_text)
        print(f"Finished translating: {filename}")
        
        create_translated_text_file(translated_text, translated_folder, valid_filename)
        print(f"Saved translated file: {valid_filename}TRANSLATED.txt")
    else:
        print(f"File not found: {filename}")

Translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _363_.txt
Finished translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _363_.txt
Saved translated file: A Regressor_s Tale of Cultivation RAW novel - Chapter _363_TRANSLATED.txt
Translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _364_.txt
Finished translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _364_.txt
Saved translated file: A Regressor_s Tale of Cultivation RAW novel - Chapter _364_TRANSLATED.txt
Translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _365_.txt
Finished translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _365_.txt
Saved translated file: A Regressor_s Tale of Cultivation RAW novel - Chapter _365_TRANSLATED.txt
Translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _366_.txt
Finished translating: A Regressor_s Tale of Cultivation RAW novel - Chapter _366_.txt
Saved translated file: A Regressor_s Tale of Cultivation 

## Scrape Raw + Translations

In [6]:
folder_name = "RTOC"

for chapter_no in chapter_range:
    URL = f"https://www.fortuneeternal.com/novel/a-regressors-tale-of-cultivation-raw-novel/chapter-{chapter_no}/"
    combined_text, valid_filename, chapter_heading = scrape(URL)
    print(f"Finished scraping: {chapter_heading}")
    print(f"Translating: {chapter_heading}")
    translated_text = translate(combined_text)
    print(f"Finished translating: {chapter_heading}")
    create_translated_text_file(translated_text, folder_name, valid_filename)

Finished scraping: A Regressor’s Tale of Cultivation RAW novel - Chapter (312)
Translating: A Regressor’s Tale of Cultivation RAW novel - Chapter (312)


# Create EPUB

In [13]:
import os
from ebooklib import epub

book = epub.EpubBook()
book.set_identifier('id123456')
book.set_title("A Regressor's Tale of Cultivation - GPTL")
book.set_language('en')
book.add_author('Tremendous - 엄청난')

translated_folder = 'RTOC'

for filename in os.listdir(translated_folder):
    if filename.endswith('TRANSLATED.txt'):
        with open(os.path.join(translated_folder, filename), 'r', encoding='utf-8') as file:
            translated_text = file.read()
        
        # Use the filename (without extension) as the chapter heading
        chapter_heading = filename.replace('TRANSLATED.txt', '').replace('_', ' ')
        
        # Create an EPUB chapter
        chapter = epub.EpubHtml(title=chapter_heading, file_name=f'{filename}.xhtml', lang='en')
        chapter.content = f'<h1>{chapter_heading}</h1><p>{translated_text.replace("\n", "</p><p>")}</p>'
        
        # Add chapter to the book
        book.add_item(chapter)
        book.toc.append(chapter)
        book.spine.append(chapter)

book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
book.add_item(nav_css)

book.spine.append('nav')
book.spine.append('cover')
book.spine.append('title_page')

folder_name = "EPUBS"
os.makedirs(folder_name, exist_ok=True)

epub_filename = os.path.join(folder_name, 'A Regressor\'s Tale of Cultivation - GPTL.epub')
epub.write_epub(epub_filename, book, {})

print(f'EPUB file created: {epub_filename}')

EPUB file created: EPUBS\A Regressor's Tale of Cultivation - GPTL.epub


# Full Code Integration


In [None]:
book = epub.EpubBook()
book.set_identifier('id123456')
book.set_title("A Regressor's Tale of Cultivation - GPTL")
book.set_language('en')
book.add_author('Tremendous - 엄청난')

folder_name = "RTOC"

for chapter_no in chapter_range:
    URL = f"https://www.fortuneeternal.com/novel/a-regressors-tale-of-cultivation-raw-novel/chapter-{chapter_no}/"
    combined_text, valid_filename, chapter_heading = scrape(URL)
    print(f"Finished scraping: {chapter_heading}")
    print(f"Translating: {chapter_heading}")
    translated_text = translate(combined_text)
    print(f"Finished translating: {chapter_heading}")
    create_translated_text_file(translated_text, folder_name, valid_filename)
    chapter = epub.EpubHtml(title=chapter_heading, file_name=f'{chapter_heading}.xhtml', lang='en')
    chapter.content = f'<h1>{chapter_heading}</h1><p>{translated_text}</p>'
    book.add_item(chapter)
    book.toc.append(chapter)
    book.spine.append(chapter)

book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
book.add_item(nav_css)

book.spine.append('nav')
book.spine.append('cover')
book.spine.append('title_page')

epub.write_epub('A Regressor\'s Tale of Cultivation - GPTL.epub', book, {})

In [None]:
# URL = f"https://www.fortuneeternal.com/novel/a-regressors-tale-of-cultivation-raw-novel/chapter-{}/"
# URL = "https://www.fortuneeternal.com/novel/i-became-a-flashing-genius-at-the-magic-academy-raw-novel/chapter-145/"