In [7]:
# need this import so Term and Sentence
# can talk about each other
from __future__ import annotations

import csv

In [11]:
class Term:
    """This basically represents all the data needed
    to generate an Anki flaschard for one word / phrase
    in the target language
    """
    text: str  # the word / phrase in the target language.
    translation: str  # the English translation of the word / phrase.
    sentence_ids: list[str]  # list of Tatoeba sentence ids for
                             # the example sentences that go with this term.
                             # This should maybe be a list of ints instead of strs.
    sentences: list[Sentence]  # Objects representing the example sentences.

    def __init__(self, text, translation, sentence_ids):
        self.text = text
        self.translation = translation
        self.sentence_ids = sentence_ids
        self.sentences = []  # we will fill this later.

class Sentence:
    tatoeba_id: str  # Tatoeba sentence id for this sentence.
                     # Should maybe be a str.
    text: str  # The text of the sentence.
    translations: list[str] | None  # English translations of this sentence.
                                    # Not all sentences will have translations,
                                    # so this value could be None.
    audio_id: str | None  # Tatoeba audio id for this sentence. Not all
                   # sentences will have audios. If there is no
                   # audio, this will be None. This should maybe be an int.
    parent_term: Term  # A reference to the vocab term that this sentence
                       # serves as an example for. This is useful in the
                       # data generation part.

    def __init__(self, parent_term, tatoeba_id, text, translations=None, audio_id=None):
        self.parent_term = parent_term
        self.tatoeba_id = tatoeba_id
        self.text = text
        self.translations = translations
        self.audio_id = audio_id

In [9]:
def process_tsv(row_fn, filepath_str, encoding='utf-8-sig'):
    """Call ROW_FN on each row of a TSV file"""
    with open(filepath_str, newline='', encoding=encoding) as f:
        reader = csv.reader(f, delimiter='\t')
        for row in reader:
            row_fn(row)
    return