In [1]:
from src.translate_domain import *
from src.tag_to_book import *
from src.llm_domain import *
from src.tag import *
from src.tag import main as tag_main

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pprint

In [3]:
proto_chapters = tag_main()

[<src.tag.ProtoChapter object at 0x7f43abe3b0b0>, <src.tag.ProtoChapter object at 0x7f43abe3b260>, <src.tag.ProtoChapter object at 0x7f43abe3b680>, <src.tag.ProtoChapter object at 0x7f43abe3b980>, <src.tag.ProtoChapter object at 0x7f43abe3bb30>]
Cigars had burned low, and we were beginning to sample the disillusionment...


In [4]:
proto_chapters

[<src.tag.ProtoChapter at 0x7f43abe3b0b0>,
 <src.tag.ProtoChapter at 0x7f43abe3b260>,
 <src.tag.ProtoChapter at 0x7f43abe3b680>,
 <src.tag.ProtoChapter at 0x7f43abe3b980>,
 <src.tag.ProtoChapter at 0x7f43abe3bb30>]

In [5]:
l = []
for chapter in proto_chapters:
   l.append(proto_to_domain(chapter))

In [6]:
b = Book(contents=l, base_language=LanguageEnum.eng)

In [7]:
g = call_gemini()

In [8]:
len(b.lines)

25

In [31]:
class BookTranslaor:
    def __init__(
        self, book: Book, model: LLM, to_language: Language, from_language: Language | None = None
    ) -> None:
        self.book = book
        self.model = model
        self.to_language = to_language
        if not from_language:
            self.from_language = book.base_language
        else:
            self.from_language = from_language
        self.calc_book_token_count()
        self.translater = Translater(model)
        self.prompt_manager = PromptManager(model)

    def calc_book_token_count(self, force_update=False) -> None:
        self.book.set_token_count(self.from_language, self.model, force_update)

    def create_context(self, component: SomeTextComponent, contextual_lines=None) -> PromptContext:
        lines = component_to_lines(component)
        return PromptContext(lines, self.from_language, self.to_language, contextual_lines)

    def get_all_prompt_contexts(self) -> list[PromptContext]:
        return self.create_segment_prompt_context_from_any(self.book)

    def reduce_output_from_components(
        self, components: list[TextComponent]
    ) -> list[PromptContext]:
        if len(components) == 1:
            return self.create_segment_prompt_context_from_any(components[0])
        index = 0
        result = []
        while len(components[:-index]) > 1:
            current_context = self.create_context(components[:-index], None)
            if self.prompt_manager.is_able_to_get_output(current_context):
                result.extend(self.create_segment_prompt_context_from_any(components[:-index]))
                result.extend(self.create_segment_prompt_context_from_any(components[-index:]))
                return result
            else:
                index += 1
        result.extend(self.create_segment_prompt_context_from_any(components[0].contents))
        result.extend(self.create_segment_prompt_context_from_any(components[1:]))
        return result

    def reduce_output_from_a_componnent(self, a_component: TextComponent) -> list[PromptContext]:
        if isinstance(a_component, Line):
            logger.error(
                "failed to create prompt",
                at="create_segment_prompt_from_any",
                error="The line is too big to translate.",
                line=a_component.text,
            )
            msg = f"The line is too big to translate.{a_component}"
            raise PromptSizeError(msg)
        else:
            return self.reduce_output(a_component.contents)

    def reduce_output(self, component: SomeTextComponent) -> list[PromptContext]:
        if isinstance(component, list):
            return self.reduce_output_from_components(component)
        else:
            return self.reduce_output_from_a_componnent(component)

    def create_segment_prompt_context_from_any(
        self, component: SomeTextComponent
    ) -> list[PromptContext]:
        try:
            result: list[PromptContext] = []
            current_context = self.create_context(component)
            if not self.prompt_manager.is_able_to_get_output(current_context):
                print("not able to get output")
                result.extend(self.reduce_output(component))
            print("able to get output")

            if not self.prompt_manager.is_able_to_send_prompt(current_context):
                print("not able to send prompt")
                result.extend(self.reduce_output(component))
            print("able to send prompt")
            context_lines = self.append_some_context_as_long_as_affordable(component)
            # print("context_lines", context_lines)
            result.append(self.create_context(component, context_lines))
            return result

        except Exception as e:
            logger.exception(
                "failed to create prompt",
                at="create_segment_prompt_from_any",
                error=e,
                component=component,
            )
            msg = f"failed to create prompt {e}"
            raise ValueError(msg)

    def find_context_start_index(
        self,
        component: SomeTextComponent,
        target_line_start_index: int,
        target_line_end_index: int,
    ) -> int:  # type: ignore
        # TODO: Not clever code here.

        move_count = 1
        book_lines = self.book.lines
        while True:
            start_index_trial = target_line_start_index - move_count
            if start_index_trial < 0:
                return 0
            current_context = self.create_context(
                component, book_lines[start_index_trial : target_line_end_index + 1]
            )
            if self.prompt_manager.is_able_to_send_prompt(current_context):
                move_count += 1
            else:
                return start_index_trial + 1

    def find_context_end_index(
        self,
        component: SomeTextComponent,
        target_line_start_index: int,
        target_line_end_index: int,
    ) -> int:  # type: ignore
        move_count = 1
        book_lines = self.book.lines
        while True:
            end_index_trial = target_line_end_index + move_count
            if end_index_trial > len(book_lines) - 1:
                return len(book_lines) - 1
            # TODO: Not clever code here.
            current_context = self.create_context(
                component, book_lines[target_line_start_index : end_index_trial + 1]
            )
            if self.prompt_manager.is_able_to_send_prompt(current_context):
                move_count += 1
            else:
                return end_index_trial - 1

    def append_some_context_as_long_as_affordable(
        self, component: SomeTextComponent
    ) -> Lines | None:
        if isinstance(component, list):
            target_line_start_index = component[0].lines[0].id
            target_line_end_index = component[-1].lines[-1].id
        else:
            target_line_start_index = component.lines[0].id
            target_line_end_index = component.lines[-1].id

        book_lines = self.book.lines
        start_index = self.find_context_start_index(
            component, target_line_start_index, target_line_end_index
        )
        end_index = self.find_context_end_index(component, start_index, target_line_end_index)
        if start_index == target_line_start_index and end_index == target_line_end_index:
            return []
        else:
            return book_lines[start_index:end_index]

In [9]:
bt = BookTranslaor(b, g, LanguageEnum.jpn)

In [10]:
cps = bt.create_segment_prompt_context_from_any(b)

not able to get output
able to get output
able to send prompt
able to get output
able to send prompt
able to get output
able to send prompt


In [11]:
len(cps)

3

In [12]:
len(cps[0].lines)

1

In [13]:
len(cps[1].lines)

1

In [14]:
len(cps[2].lines)

25

In [15]:
cps[2].get_input_token_count(g)
cps[2].get_output_token_count(g)

3950

In [16]:
g.output_token_limit

2048

In [17]:
len(b.lines)

25

In [18]:
cps[0].get_input_token_count(g)

17

In [19]:
cps[1].get_output_token_count(g)

80

In [20]:
len(cps[1].lines)

1

In [21]:
len(cps[0].contextual_lines)

24

In [22]:
len(cps[0].contextual_lines)

24

In [23]:
len(cps[0].lines)

1

In [24]:
all = bt.create_context(b)

In [25]:
g.output_token_limit

2048

In [26]:
bt.prompt_manager.calculate_output_token(all)

3950

In [27]:
bt.prompt_manager.calculate_input_token(all)

798

In [28]:
cps[1].get_output_token_count(g)

80

In [29]:
cps[1].get_input_token_count(g)

16

In [30]:
len(a.contextual_lines)

NameError: name 'a' is not defined

In [None]:
ll = component_to_lines(b)

In [None]:
ll[0].text

In [None]:
remove_empty_lines(ll)

In [None]:
bt.create_context(b)