In [5]:
import os
import re
import pickle
from pathlib import Path

from sqlalchemy import create_engine, insert, select

from lesson_parser import MyHTMLParser, get_words
from tonic_accent import get_sentences

db_directory = "./"
word_dict_filename = "test_tonic_accent_word_dict.db"


Analyzing lesson 1 with length 521
Analyzing lesson 2 with length 631
Analyzing lesson 3 with length 706
Analyzing lesson 4 with length 670
Analyzing lesson 5 with length 807
Analyzing lesson 6 with length 785
Analyzing lesson 7 with length 751
Analyzing lesson 8 with length 1117
Analyzing lesson 9 with length 1059
Analyzing lesson 10 with length 1064
word_dict.index_ref : [(9, 5), (10, 2), (10, 3)]
Analyzing lesson 11 with length 1216
Analyzing lesson 12 with length 1104
Analyzing lesson 13 with length 1187
word_dict.index_ref : [(9, 5), (10, 2), (10, 3), (13, 6), (13, 14)]
Analyzing lesson 14 with length 954
word_dict.index_ref : [(9, 5), (10, 2), (10, 3), (13, 6), (13, 14), (14, 5)]
Analyzing lesson 15 with length 1058
Analyzing lesson 16 with length 1392
Analyzing lesson 17 with length 1143
Analyzing lesson 18 with length 1307
Analyzing lesson 19 with length 1206
Analyzing lesson 20 with length 1273
Analyzing lesson 21 with length 1297
Analyzing lesson 22 with length 1353
Analyzing

In [6]:
from tonic_accent import global_word_dict as word_dict
keys_number = 0
for key in word_dict:
    keys_number += 1
print(f"Number of keys : {keys_number}")

Number of keys : 3190


In [7]:
from typing import List
from typing import Optional
from sqlalchemy.orm import Mapped
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm import relationship
from sqlalchemy.orm import DeclarativeBase

class Base(DeclarativeBase):
    pass

class WordDict(Base):
    __tablename__ = "word_dict"
    id: Mapped[int] = mapped_column(primary_key=True)
    word: Mapped[str] 
    word_struct: Mapped[str]
    comment: Mapped[Optional[str]]
    index: Mapped[str]
    def __repr__(self) -> str:
        return f"WordDict(id={self.id!r}, word={self.word!r}, comment={self.comment!r})"

class Sentence(Base):
    __tablename__ = "sentences"
    id: Mapped[int] = mapped_column(primary_key=True)
    sentence: Mapped[str] 
    comment: Mapped[Optional[str]]
    lesson : Mapped[int]
    line : Mapped[int]
    def __repr__(self) -> str:
        return f"Sentence(id={self.id!r}, word={self.sentence!r}, lesson={lesson!r}, numero={numero!r}, comment={self.comment!r})"

In [8]:
db_name = Path(db_directory, word_dict_filename)
db_url  = f"sqlite:///{db_name}"
db_url

'sqlite:///test_tonic_accent_word_dict.db'

In [9]:

engine = create_engine(db_url, echo=False)

In [10]:
metadata = Base.metadata

In [11]:
metadata.create_all(engine)

In [12]:
word_table = metadata.tables['word_dict']
sentence_table = metadata.tables['sentences']

In [13]:
sentences = get_sentences(50)
get_words(sentences[2])

['S01',
 'Felipe',
 'guapo',
 'dame',
 'diez',
 'lonchas',
 'de',
 'jamón',
 'de',
 'york',
 'por',
 'favor']

In [20]:

def fill_word_dict_table(word_dict):
    with engine.connect() as conn:
        for word in word_dict:
            stmt = insert(word_table).values(word=word,\
                                             word_struct=pickle.dumps(word_dict[word].tonic_accent),\
                                             index = pickle.dumps(word_dict[word].index_ref))
            result = conn.execute(stmt)
        conn.commit()

In [21]:
fill_word_dict_table(word_dict)

In [46]:
stmt = select(word_table).where(word_table.c.word == "y")

In [47]:
with engine.connect() as conn:
    for row in conn.execute(stmt):
        print(f"word : {row.word}, structure : {pickle.loads(row.word_struct)}, index = {pickle.loads(row.index)}")

word : y, structure : [('y', False)], index = [(1, 5), (2, 5), (4, 2), (4, 3), (4, 9), (5, 3), (5, 6), (7, 3), (7, 5), (7, 8), (8, 5), (8, 11), (9, 2), (9, 4), (9, 11), (9, 13), (10, 4), (10, 4), (10, 7), (12, 2), (12, 3), (12, 6), (12, 7), (12, 10), (13, 4), (13, 8), (14, 4), (14, 9), (15, 6), (15, 8), (16, 2), (16, 3), (16, 4), (16, 5), (16, 7), (16, 7), (16, 8), (16, 9), (16, 13), (16, 14), (16, 15), (17, 6), (18, 12), (19, 6), (21, 5), (21, 13), (22, 12), (23, 8), (23, 8), (23, 13), (24, 7), (24, 9), (24, 10), (25, 3), (25, 8), (25, 11), (25, 11), (25, 16), (25, 18), (26, 4), (27, 3), (27, 5), (27, 6), (27, 6), (27, 6), (27, 12), (27, 14), (28, 6), (28, 7), (28, 9), (28, 12), (29, 3), (29, 4), (29, 4), (29, 8), (29, 9), (29, 9), (29, 9), (29, 14), (29, 15), (29, 17), (29, 17), (29, 17), (30, 4), (30, 5), (30, 9), (30, 10), (30, 13), (30, 14), (31, 0), (31, 7), (31, 9), (31, 13), (32, 0), (32, 5), (32, 11), (33, 0), (33, 8), (33, 10), (33, 18), (33, 19), (34, 0), (34, 3), (34, 4), (

In [15]:
stmt = select(word_table).where(word_table.c.word == "traduzca")
with engine.connect() as conn:
    for row in conn.execute(stmt):
        print(f"word : {row.word}, structure : {pickle.loads(row.word_struct)}")

word : traduzca, structure : [('Tra', False), ('du', True), ('zca', False)]


In [16]:
stmt = select(word_table).where(word_table.c.word == "y")
with engine.connect() as conn:
    for row in conn.execute(stmt):
        print(f"word : {row.word}, structure : {pickle.loads(row.word_struct)}")

word : y, structure : [('Y', False)]


In [21]:
with engine.connect() as conn:
    for k in range(79):
        lesson = k + 1
        sentences = get_sentences(lesson)
        for line in range(len(sentences)):
            stmt = insert(sentence_table).values(sentence=sentences[line], lesson=lesson, line=line)
            conn.execute(stmt)
    conn.commit()

In [24]:
stmt = select(sentence_table).where(sentence_table.c.lesson == 72)
with engine.connect() as conn:
    for row in conn.execute(stmt):
        print(f"line : {row.line}, sentence : {row.sentence}")

line : 0, sentence : N72-Lección setenta y dos
line : 1, sentence : S00-TITLE-Ya que estoy
line : 2, sentence : S01-Mírese en el espejo, ¿cómo se siente? 
line : 3, sentence : S02-Así así
line : 4, sentence : S03-Es una prenda de entretiempo, de algodón. Le serviría tanto para primavera como para otoño. 
line : 5, sentence : S04-Me queda un poco estrecho de cintura, ¿no? 
line : 6, sentence : S05-A ver, muévase un poco, dese la vuelta. Perdiendo un kilito, le quedaría perfecto. 
line : 7, sentence : S06-Tengo que ponerme a dieta, tiene toda la razón. 
line : 8, sentence : S07-Es de rayas ¿Cree que me favorece? 
line : 9, sentence : S08-Le queda fabuloso, pero existe también liso y de cuadros. Yo que usted me llevaría los tres. 
line : 10, sentence : S09-Venga, un día es un día. Ah, ya que estoy, he visto que tiene sección de caballeros. 
line : 11, sentence : S10-Mi marido va siempre de traje, pero me gustaría algo más informal. Una americana sport, talla 50. 
line : 12, sentence : S11

In [38]:
m = "L[\d]{3}"
p = re.compile(m)
if p.match("L067.html"):
    print("match")

match


In [41]:
p.match("L067.html")

<re.Match object; span=(0, 4), match='L067'>

In [44]:
fn="L067.html"
int(fn[1:4])

67

In [45]:
a=[1,2,3]

In [46]:
a

[1, 2, 3]

In [47]:
dir(a)

['__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [48]:
a.extend([7, 8, 9])

In [49]:
a

[1, 2, 3, 7, 8, 9]

In [51]:
a.extend(["toto", "titi"])

In [52]:
a

[1, 2, 3, 7, 8, 9, 'toto', 'titi']

In [54]:
a.extend("toto")

In [55]:
a

[1, 2, 3, 7, 8, 9, 'toto', 'titi', 't', 'o', 't', 'o']

In [56]:
Word_properties = namedtuple("Word_properties", "tonic_accent index_ref")

In [61]:
a=Word_properties("dummy", [(1, 15), (2, 4)])

In [62]:
a

Word_properties(tonic_accent='dummy', index_ref=[(1, 15), (2, 4)])

In [65]:
a.index_ref.extend([(34, 5), (45, 7), (78, 19)])
a

Word_properties(tonic_accent='dummy', index_ref=[(1, 15), (2, 4), (34, 5), (45, 7), (78, 19)])

In [60]:
a

Word_properties(tonic_accent='dummy', index_ref=[(1, 15), (2, 4), 'toto'])

In [66]:
b = {1 : a, 2 : "toto"}

In [68]:
b[1].index_ref.extend([(15, 3), (23, 1)])

In [69]:
b

{1: Word_properties(tonic_accent='dummy', index_ref=[(1, 15), (2, 4), (34, 5), (45, 7), (78, 19), (15, 3), (23, 1)]),
 2: 'toto'}

In [70]:
c = {3 : [(23, 5), (43, 4)], 4 : "titi"}

In [71]:
c

{3: [(23, 5), (43, 4)], 4: 'titi'}

In [72]:
b[1].index_ref.extend(c[3])

In [73]:
b

{1: Word_properties(tonic_accent='dummy', index_ref=[(1, 15), (2, 4), (34, 5), (45, 7), (78, 19), (15, 3), (23, 1), (23, 5), (43, 4)]),
 2: 'toto'}