## Database

In [None]:
#from google.colab import drive
#drive.mount('/gdrive')

In [None]:
DB_PATH = 'lanelexicon.sqlite'

### Backup

In [None]:
# backup database
!cp lanelexicon.sqlite lanelexicon.sqlite.bk

### Restore

In [None]:
# restore backup
!rm lanelexicon.sqlite
!cp lanelexicon.sqlite.bk lanelexicon.sqlite

## Operations

In [None]:
import sys
import sqlite3

In [None]:
# viewer to display first n rows from the database
def view_database(db_path, table_name="DICTIONARY", column_name="definition", limit=5):
    # connect to database
    conn = sqlite3.connect(db_path)
    conn.execute("pragma journal_mode=wal")
    cursor = conn.cursor()

    # check if table exists
    cursor.execute(
        "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
        (table_name,),
    )
    if not cursor.fetchone():
        print(f"Table '{table_name}' not found in database.")

        # show available tables
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
        tables = cursor.fetchall()
        if tables:
            print("Available tables:", [table[0] for table in tables])
        return

    # check if column exists
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [column[1] for column in cursor.fetchall()]

    if column_name not in columns:
        print(f"Column '{column_name}' not found in table '{table_name}'")
        print(f"Available columns: {columns}")
        return

    # get total count
    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
    total_count = cursor.fetchone()[0]

    print(f"Database: {db_path}")
    print(f"Table: {table_name}")
    print(f"Column: {column_name}")
    print(f"Total rows: {total_count}")
    print("=" * 80)

    # fetch and display data
    cursor.execute(f"SELECT rowid, {column_name} FROM {table_name} LIMIT ?", (limit,))
    rows = cursor.fetchall()

    for i, (rowid, definition) in enumerate(rows, 1):
        print(f"\nRow {i} (ID: {rowid}):")
        print("-" * 40)
        print(definition)
        print(definition.encode("unicode_escape"))

    conn.close()

In [None]:
def print_definitions(db_path, row_limit=None, row_id=None):
    # connect to sqlite db
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # determine query based on row_id or row_limit
    if row_id is not None:
        cursor.execute(
            "SELECT id, definition FROM dictionary WHERE id = ?",
            (row_id,)
        )
    else:
        limit = row_limit if isinstance(row_limit, int) else 10
        cursor.execute(
            "SELECT id, definition FROM dictionary WHERE definition IS NOT NULL LIMIT ?",
            (limit,)
        )
    rows = cursor.fetchall()

    # print each definition and its unicode-escaped form
    for pk, definition in rows:
        unicode_escaped = definition.encode('unicode_escape')

        print(definition)
        print(unicode_escaped)
        print()

    conn.close()

In [None]:
view_database(DB_PATH, "DICTIONARY", "definition")


Database: lanelexicon.sqlite
Table: DICTIONARY
Column: definition
Total rows: 52914
Showing first 5 rows:

Row 1 (ID: 1):
----------------------------------------
ا
b'\\u0627'

Row 2 (ID: 2):
----------------------------------------
ا <i>The first letter of the alphabet</i> [according to the order in which the letters are now commonly disposed; and also according to the original order, which see in art. ابجد]: called أَلِفٌ. [This name, like most of the other names of Arabic letters, is traceable to the Phœnician language, in which it signifies “an ox;” the ancient Phœnician form of the letter thus called being a rude representation of an ox's head.] It is, of all the letters, that which is most frequent in speech: and some say that, in آلٓمٓ, in the Kur [ch. ii. &c.], it is a name of God. (TA.) Its name is properly fem., as is also that of every other letter; [and hence its pl. is أَلِفَاتٌ;] but it may be made masc.: so says Ks: Sb says that all the letters of the alphabet are masc

In [None]:
def fix_replace_markers(db_path):
    # connect to sqlite db and enable wal mode
    conn = sqlite3.connect(db_path)
    conn.execute('PRAGMA journal_mode=WAL')
    cursor = conn.cursor()

   # fix and replace markers in definition text
    query = """
        UPDATE DICTIONARY
        SET DEFINITION = REPLACE(
            REPLACE(
                REPLACE(
                    DEFINITION,
                    '___',
                    '{__DISS__}'  -- 1) mask original ___
                ),
                '===',
                '<br /><b>SIGNIFICATION (___)</b><br />'  -- 2) replace ===
            ),
            '{__DISS__}',
            '<br /><b>DISSOCIATION (===)</b><br />'   -- 3) unmask into dissociation
        )
        WHERE DEFINITION LIKE '%___%'
           OR DEFINITION LIKE '%===%';
    """
    cursor.execute(query)

    conn.commit()
    print(f"markers replaced in {cursor.rowcount} rows.")
    conn.close()

In [None]:
fix_replace_markers(DB_PATH)

In [None]:
def replace_arrows(db_path):
    # connect to sqlite db and enable wal mode
    conn = sqlite3.connect(db_path)
    conn.execute('PRAGMA journal_mode=WAL')
    cursor = conn.cursor()

    # replace arrow symbol with bold down arrow
    query = """
        UPDATE DICTIONARY"
        SET DEFINITION = REPLACE(DEFINITION, '🡻', '<b>↓</b>');
    """
    cursor.execute(query)

    conn.commit()
    print(f"arrows replaced in {cursor.rowcount} rows.")
    conn.close()

In [None]:
replace_arrows(DB_PATH)

In [None]:
import re

from tqdm.notebook import tqdm
#from tqdm import tqdm

In [None]:
# compile a regex that captures:
# - one or more arabic words/spaces at the very start
# - an optional space + number
pattern = re.compile(r'^\s*([\u0600-\u06FF]+(?:\s+[\u0600-\u06FF]+)*)(?:\s+(\d+))?')

def wrap_heading(def_text):
    # if def_text starts with Arabic words +/- a trailing number,
    # wrap them in <h2>…</h2>. Otherwise return def_text unchanged.
    match = pattern.match(def_text)
    if not match:
        return def_text

    arabic_part = match.group(1)
    number_part = match.group(2) or ''
    # build the replacement, preserving a space if number exists
    heading = f"<h2>{arabic_part}{' ' + number_part if number_part else ''}</h2>"
    # substitute only at the very start
    return pattern.sub(heading, def_text, count=1)

def process_definitions(db_path):
    conn = sqlite3.connect(db_path)
    conn.execute('pragma journal_mode=wal')
    cursor = conn.cursor()

    # count total non-null rows for progress bar
    cursor.execute(
        "SELECT COUNT(*) FROM DICTIONARY WHERE definition IS NOT NULL"
    )
    total = cursor.fetchone()[0]

    # select primary key and definition for all non-null rows
    cursor.execute(
        "SELECT rowid, definition FROM DICTIONARY WHERE definition IS NOT NULL"
    )
    definitions = cursor.fetchall()

    updates = []

    for rowid, definition in tqdm(definitions, total=total, desc="processing rows"):
        new_def = wrap_heading(definition)
        if new_def != definition:
            updates.append((new_def, rowid))

    if updates:
        print(f"\nppdating {len(updates)} rows...")
        cursor.executemany(
            "UPDATE DICTIONARY SET definition = ? WHERE rowid = ?",
            updates
        )
        conn.commit()
        print("ppdate committed.")
    else:
        print("\nno rows needed updating.")

    conn.close()

Processing rows: 100%|██████████| 52912/52912 [00:00<00:00, 227454.80it/s]



Updating 52911 rows...
Update committed.


In [None]:
process_definitions(DB_PATH)

In [None]:
view_database(DB_PATH, "DICTIONARY", "definition")

Database: lanelexicon.sqlite
Table: DICTIONARY
Column: definition
Total rows: 52914
Showing first 5 rows:

Row 1 (ID: 1):
----------------------------------------
<h2>ا</h2>
b'<h2>\\u0627</h2>'

Row 2 (ID: 2):
----------------------------------------
<h2>ا</h2> <i>The first letter of the alphabet</i> [according to the order in which the letters are now commonly disposed; and also according to the original order, which see in art. ابجد]: called أَلِفٌ. [This name, like most of the other names of Arabic letters, is traceable to the Phœnician language, in which it signifies “an ox;” the ancient Phœnician form of the letter thus called being a rude representation of an ox's head.] It is, of all the letters, that which is most frequent in speech: and some say that, in آلٓمٓ, in the Kur [ch. ii. &c.], it is a name of God. (TA.) Its name is properly fem., as is also that of every other letter; [and hence its pl. is أَلِفَاتٌ;] but it may be made masc.: so says Ks: Sb says that all the letter

In [None]:
# backup database
!cp lanelexicon.sqlite lanelexicon.sqlite.intermediate.bk

In [None]:
# restore backup
!rm lanelexicon.sqlite
!cp lanelexicon.sqlite.intermediate.bk lanelexicon.sqlite

In [None]:
import re
import pandas as pd

from tqdm.notebook import tqdm
#from tqdm import tqdm

In [None]:
# define the mapping
#mapping = {
#    '1':  "-> I: فَعَل/فَعُل/فَعِل",
#    '2':  "-> II: فَعّل",
#    '3':  "-> III: فَاعَل",
#    '4':  "-> IV: أَفْعَل",
#    '5':  "-> V: تَفَعّل",
#    '6':  "-> VI: تَفَاعَل",
#    '7':  "-> VII: اِنْفَعَل",
#    '8':  "-> VIII: اِفْتَعَل",
#    '9':  "-> IX: اِفْعَل",
#    '10': "-> X: اِسْتَفْعَل",
#    '11': "-> XI: اِفْعالَّ",
#    '12': "-> XII: اِفْعَوْعَلَ",
#    '13': "-> XIII: اِفْعَوَّلَ",
#}
mapping = {
    '1':  "(1: فَعَل/فَعُل/فَعِل)",
    '2':  "(2: فَعّل)",
    '3':  "(3: فَاعَل)",
    '4':  "(4: أَفْعَل)",
    '5':  "(5: تَفَعّل)",
    '6':  "(6: تَفَاعَل)",
    '7':  "(7: اِنْفَعَل)",
    '8':  "(8: اِفْتَعَل)",
    '9':  "(9: اِفْعَل)",
    '10': "(10: اِسْتَفْعَل)",
    '11': "(11: اِفْعالَّ)",
    '12': "(12: اِفْعَوْعَلَ)",
    '13': "(13: اِفْعَوَّلَ)",
}

lrm_unicode = '\u200E';

conn = sqlite3.connect(db_path)
conn.execute('pragma journal_mode=wal')
df = pd.read_sql_query('SELECT id, definition FROM DICTIONARY', conn)

# regex patterns; capture:
# - the opening <h2> tag (with any attrs/spaces)
# - either (word + number) or (number + word), can capture multiple arabic words before number
# - the closing </h2>
# - the rest of the text
header_regex = re.compile(
    r'^(?P<open><h2\b[^>]*>\s*)'
    r'(?:(?P<word1>.+?)\s+(?P<num1>\d+)'
    r'|(?P<num2>\d+)\s+(?P<word2>.+?))'
    r'(?P<close>\s*</h2>)(?P<rest>.*)$',
    flags=re.DOTALL
)

# compute new definitions
def map_definition(text):
    if not isinstance(text, str):
        return text
    m = header_regex.match(text)
    if not m:
        return text

    # determine which form matched
    num = m.group('num1') or m.group('num2')
    word = m.group('word1') or m.group('word2')

    if num not in mapping:
        return text

    # rebuild the header: preserve the original opening & closing
    new_header = (
        m.group('open')
        + word
        + lrm_unicode
        + ' '
        + mapping[num]
        + m.group('close')
    )
    return new_header + m.group('rest')

df['new_definition'] = df['definition'].apply(map_definition)

# update database
cur = conn.cursor()
for _, row in df.iterrows():
    if row['new_definition'] != row['definition']:
        cur.execute(
            'UPDATE DICTIONARY SET definition = ? WHERE id = ?',
            (row['new_definition'], row['id'])
        )
conn.commit()
conn.close()

df

Unnamed: 0,id,definition,new_definition
0,1,<h2>ا</h2>,<h2>ا</h2>
1,2,<h2>ا</h2> <i>The first letter of the alphabet...,<h2>ا</h2> <i>The first letter of the alphabet...
2,3,<h2>اب</h2>,<h2>اب</h2>
3,4,"<h2>أَبَّ 1</h2> , (T, S, M, &c.,) aor. ﹻ, (M,...","<h2>أَبَّ‎ (1: فَعَل/فَعُل/فَعِل)</h2> , (T, S..."
4,5,"<h2>إِاْتَبَبَ 8</h2> see 1, first signification.","<h2>إِاْتَبَبَ‎ (8: اِفْتَعَل)</h2> see 1, fir..."
...,...,...,...
52909,52910,<h2>مُيَاوَمَةٌ</h2> from اليَوْمُ is like مُل...,<h2>مُيَاوَمَةٌ</h2> from اليَوْمُ is like مُل...
52910,52911,<h2>يى</h2>,<h2>يى</h2>
52911,52912,<h2>يَيَّيْتُ يَآءً حَسَنَةً 2</h2> [<i>I wro...,<h2>يَيَّيْتُ يَآءً حَسَنَةً‎ (2: فَعّل)</h2>...
52912,52913,,


In [None]:
# define the mapping from numbers to arabic form templates
template_mapping = {
    '1':  '(1: فَعَل/فَعُل/فَعِل)',
    '2':  '(2: فَعّل)',
    '3':  '(3: فَاعَل)',
    '4':  '(4: أَفْعَل)',
    '5':  '(5: تَفَعّل)',
    '6':  '(6: تَفَاعَل)',
    '7':  '(7: اِنْفَعَل)',
    '8':  '(8: اِفْتَعَل)',
    '9':  '(9: اِفْعَل)',
    '10': '(10: اِسْتَفْعَل)',
    '11': '(11: اِفْعَالَّ)',
    '12': '(12: اِفْعَوْعَلَ)',
    '13': '(13: اِفْعَوَّلَ)',
}
# left-to-right mark for proper rendering in arabic contexts
lrm = '\u200E'

# compile regex to match opening h2, arabic words+number, closing h2, and rest of text
header_regex = re.compile(
    r'^(?P<open><h2\b[^>]*>\s*)'
    r'(?:(?P<word1>.+?)\s+(?P<num1>\d+)'
    r'|(?P<num2>\d+)\s+(?P<word2>.+?))'
    r'(?P<close>\s*</h2>)(?P<rest>.*)$',
    flags=re.DOTALL
)

def map_definition(def_text):
    # if not a string, return unchanged
    if not isinstance(def_text, str):
        return def_text
    m = header_regex.match(def_text)
    # if header pattern not found, return unchanged
    if not m:
        return def_text

    # pick matched number and word group
    num  = m.group('num1') or m.group('num2')
    word = m.group('word1') or m.group('word2')
    # if no mapping for this number, return unchanged
    if num not in template_mapping:
        return def_text

    # rebuild header with template mapping and lrm mark
    new_header = (
        m.group('open')
        + word
        + lrm
        + ' '
        + template_mapping[num]
        + m.group('close')
    )
    # return the modified header plus the rest of the original definition
    return new_header + m.group('rest')

def process_mapping(db_path):
    # connect to sqlite db and enable wal mode
    conn = sqlite3.connect(db_path)
    conn.execute('PRAGMA journal_mode=WAL')
    cursor = conn.cursor()

    # count total non-null definitions for progress bar
    cursor.execute("SELECT COUNT(*) FROM dictionary WHERE definition IS NOT NULL")
    total = cursor.fetchone()[0]

    # fetch id and definition
    cursor.execute("SELECT id, definition FROM dictionary WHERE definition IS NOT NULL")
    rows = cursor.fetchall()

    updates = []
    for pk, definition in tqdm(rows, total=total, desc="mapping rows"):
        new_def = map_definition(definition)
        # if changed, queue for update
        if new_def != definition:
            updates.append((new_def, pk))

    if updates:
        # batch update for all modified rows
        cursor.executemany(
            "UPDATE dictionary SET definition = ? WHERE id = ?",
            updates
        )
        conn.commit()
        print(f"updated {len(updates)} rows.")
    else:
        print("no rows needed updating.")

    conn.close()

In [None]:
process_mapping(DB_PATH)

In [None]:
view_database(DB_PATH, table, column, limit)

Database: lanelexicon.sqlite
Table: DICTIONARY
Column: definition
Total rows: 52914
Showing first 5 rows:

Row 1 (ID: 1):
----------------------------------------
<h2>ا</h2>
b'<h2>\\u0627</h2>'

Row 2 (ID: 2):
----------------------------------------
<h2>ا</h2> <i>The first letter of the alphabet</i> [according to the order in which the letters are now commonly disposed; and also according to the original order, which see in art. ابجد]: called أَلِفٌ. [This name, like most of the other names of Arabic letters, is traceable to the Phœnician language, in which it signifies “an ox;” the ancient Phœnician form of the letter thus called being a rude representation of an ox's head.] It is, of all the letters, that which is most frequent in speech: and some say that, in آلٓمٓ, in the Kur [ch. ii. &c.], it is a name of God. (TA.) Its name is properly fem., as is also that of every other letter; [and hence its pl. is أَلِفَاتٌ;] but it may be made masc.: so says Ks: Sb says that all the letter

In [None]:
print("Processed (old, final):")
str_p_prev = "<h3>أَبَّ -> I: فَعَل/فَعُل/فَعِل</h3> , (T, S, M, &c.,) aor. ﹻ, (M, K,) agreeably with analogy in the case of an intrans. verb of this class, (TA,) and ﹹ, (AZ, T, S, M, K,) contr. to analogy, (TA,) inf. n. أَبٌّ (T, S, M, K) and أَبِيبٌ (M, K) and أَبَابٌ and أَبَابَةٌ (S, M, K) and إِبَابَةٌ; (M;) and <b>↓</b>ٱئْتَبَّ [written with the disjunctive alif اِيتَبَّ]; (T, K;) <i>He prepared himself,</i> (AZ, S, M, A, K,) and <i>equipped himself,</i> (AZ, S, A,) for (لِ) departing, or going away, (AZ, S,) or for journeying: (M, A, K:) or <i>he determined upon journeying, and prepared himself.</i> (T.) El-Aashà says, <center> <h3> صَرَمْتُ وَلَمْ أَصْرِمْكُمُ وَكَصَارِمٍ </h3> <h3> أَخٌ قَدْ طَوَى كَشْحًا وَأَبَّ لِيَذْهَبَا </h3> </center> (T, S, M, TA,) i. e. <i>I cut</i> [in effect, <i>while I did not really cut</i>] <i> you: for like one who cuts is a brother who has determined and prepared to go away.</i> (TA.) [Hence,] لَا عَبَابَ وَ لَا أَبَابَ, [or لا عَبَابِ ولا أَبَابِ,] a prov. [which see explained in art. عب]. (TA.) [And hence the saying,] هُوَ فِى أَبَابِهِ, (S, M, K,) and أَبَابَتِهِ, and إِبَابَتِهِ, (M,) <i>He is in his</i> [<i>state of,</i> or <i>he is engaged in his,</i>] <i>preparation</i> or <i>equipment</i> [for departing or journeying]. (S, M, K.) The hemzeh in أَبَّ is sometimes changed into و; and thus وَبَّ, inf. n. وَبٌّ, signifies <i>He prepared himself to assault,</i> or <i>charge, in battle.</i> (T, TA.) <br /><b>SIGNIFICATION ___</b> أَبَّتْ أَبَابَتُهُ, and إِبَابَتُهُ, <i>His way,</i> or <i>course, of acting,</i> or <i>conduct,</i> or <i>the like, was,</i> or <i>became, rightly directed,</i> or <i>ordered.</i> (M, K.) <br /><b>SIGNIFICATION ___</b> أَبَّ <b>↓</b>أَبَّهُ <i>i. q.</i> قَصَدَ قَصْدَهُ, (K,) which signifies <i>He tended, repaired, betook himself,</i> or <i>directed his course, towards him,</i> or <i>it:</i> (S and Msb in art. قصد:) and also, <i>he pursued his</i> (another‘s) <i>course, doing as he</i> (the latter) <i>did.</i> (L in art. وكد.) <br /><b>SIGNIFICATION ___</b> أَبَّدِ إِلَى وَطَنِهِ, (M, K,) aor. ﹻ (IDrd, M, K) and ﹹ, (K,) inf. n. أَبٌّ (AA, S, M, K) and إِبَابَةٌ and أَبَابَةٌ (M, K,) and أَبَابٌ, (TA,) <i>He yearned for, longed for,</i> or <i>longed to see, his home.</i> (AA, S, M, K.)"

print(str_p_prev.encode("unicode_escape"))

print()

print("Unprocessed (new):")
str_up_new = "أَبَّ 1 , (T, S, M, &c.,) aor. ﹻ, (M, K,) agreeably with analogy in the case of an intrans. verb of this class, (TA,) and ﹹ, (AZ, T, S, M, K,) contr. to analogy, (TA,) inf. n. أَبٌّ (T, S, M, K) and أَبِيبٌ (M, K) and أَبَابٌ and أَبَابَةٌ (S, M, K) and إِبَابَةٌ; (M;) and 🡻ٱئْتَبَّ [written with the disjunctive alif اِيتَبَّ]; (T, K;) <i>He prepared himself,</i> (AZ, S, M, A, K,) and <i>equipped himself,</i> (AZ, S, A,) for (لِ) departing, or going away, (AZ, S,) or for journeying: (M, A, K:) or <i>he determined upon journeying, and prepared himself.</i> (T.) El-Aashà says, <center> <h3> صَرَمْتُ وَلَمْ أَصْرِمْكُمُ وَكَصَارِمٍ </h3> <h3> أَخٌ قَدْ طَوَى كَشْحًا وَأَبَّ لِيَذْهَبَا </h3> </center> (T, S, M, TA,) i. e. <i>I cut</i> [in effect, <i>while I did not really cut</i>] <i> you: for like one who cuts is a brother who has determined and prepared to go away.</i> (TA.) [Hence,] لَا عَبَابَ وَ لَا أَبَابَ, [or لا عَبَابِ ولا أَبَابِ,] a prov. [which see explained in art. عب]. (TA.) [And hence the saying,] هُوَ فِى أَبَابِهِ, (S, M, K,) and أَبَابَتِهِ, and إِبَابَتِهِ, (M,) <i>He is in his</i> [<i>state of,</i> or <i>he is engaged in his,</i>] <i>preparation</i> or <i>equipment</i> [for departing or journeying]. (S, M, K.) The hemzeh in أَبَّ is sometimes changed into و; and thus وَبَّ, inf. n. وَبٌّ, signifies <i>He prepared himself to assault,</i> or <i>charge, in battle.</i> (T, TA.) === أَبَّتْ أَبَابَتُهُ, and إِبَابَتُهُ, <i>His way,</i> or <i>course, of acting,</i> or <i>conduct,</i> or <i>the like, was,</i> or <i>became, rightly directed,</i> or <i>ordered.</i> (M, K.) === أَبَّ 🡻أَبَّهُ <i>i. q.</i> قَصَدَ قَصْدَهُ, (K,) which signifies <i>He tended, repaired, betook himself,</i> or <i>directed his course, towards him,</i> or <i>it:</i> (S and Msb in art. قصد:) and also, <i>he pursued his</i> (another‘s) <i>course, doing as he</i> (the latter) <i>did.</i> (L in art. وكد.) === أَبَّدِ إِلَى وَطَنِهِ, (M, K,) aor. ﹻ (IDrd, M, K) and ﹹ, (K,) inf. n. أَبٌّ (AA, S, M, K) and إِبَابَةٌ and أَبَابَةٌ (M, K,) and أَبَابٌ, (TA,) <i>He yearned for, longed for,</i> or <i>longed to see, his home.</i> (AA, S, M, K.)"

print(str_up_new.encode("unicode_escape"))

print()

print("Processed (new):")
str_p_new = "<h3>أَبَّ 1</h3> , (T, S, M, &c.,) aor. ﹻ, (M, K,) agreeably with analogy in the case of an intrans. verb of this class, (TA,) and ﹹ, (AZ, T, S, M, K,) contr. to analogy, (TA,) inf. n. أَبٌّ (T, S, M, K) and أَبِيبٌ (M, K) and أَبَابٌ and أَبَابَةٌ (S, M, K) and إِبَابَةٌ; (M;) and <b>↓</b>ٱئْتَبَّ [written with the disjunctive alif اِيتَبَّ]; (T, K;) <i>He prepared himself,</i> (AZ, S, M, A, K,) and <i>equipped himself,</i> (AZ, S, A,) for (لِ) departing, or going away, (AZ, S,) or for journeying: (M, A, K:) or <i>he determined upon journeying, and prepared himself.</i> (T.) El-Aashà says, <center> <h3> صَرَمْتُ وَلَمْ أَصْرِمْكُمُ وَكَصَارِمٍ </h3> <h3> أَخٌ قَدْ طَوَى كَشْحًا وَأَبَّ لِيَذْهَبَا </h3> </center> (T, S, M, TA,) i. e. <i>I cut</i> [in effect, <i>while I did not really cut</i>] <i> you: for like one who cuts is a brother who has determined and prepared to go away.</i> (TA.) [Hence,] لَا عَبَابَ وَ لَا أَبَابَ, [or لا عَبَابِ ولا أَبَابِ,] a prov. [which see explained in art. عب]. (TA.) [And hence the saying,] هُوَ فِى أَبَابِهِ, (S, M, K,) and أَبَابَتِهِ, and إِبَابَتِهِ, (M,) <i>He is in his</i> [<i>state of,</i> or <i>he is engaged in his,</i>] <i>preparation</i> or <i>equipment</i> [for departing or journeying]. (S, M, K.) The hemzeh in أَبَّ is sometimes changed into و; and thus وَبَّ, inf. n. وَبٌّ, signifies <i>He prepared himself to assault,</i> or <i>charge, in battle.</i> (T, TA.) <br /><b>SIGNIFICATION ___</b> أَبَّتْ أَبَابَتُهُ, and إِبَابَتُهُ, <i>His way,</i> or <i>course, of acting,</i> or <i>conduct,</i> or <i>the like, was,</i> or <i>became, rightly directed,</i> or <i>ordered.</i> (M, K.) <br /><b>SIGNIFICATION ___</b> أَبَّ <b>↓</b>أَبَّهُ <i>i. q.</i> قَصَدَ قَصْدَهُ, (K,) which signifies <i>He tended, repaired, betook himself,</i> or <i>directed his course, towards him,</i> or <i>it:</i> (S and Msb in art. قصد:) and also, <i>he pursued his</i> (another‘s) <i>course, doing as he</i> (the latter) <i>did.</i> (L in art. وكد.) <br /><b>SIGNIFICATION ___</b> أَبَّدِ إِلَى وَطَنِهِ, (M, K,) aor. ﹻ (IDrd, M, K) and ﹹ, (K,) inf. n. أَبٌّ (AA, S, M, K) and إِبَابَةٌ and أَبَابَةٌ (M, K,) and أَبَابٌ, (TA,) <i>He yearned for, longed for,</i> or <i>longed to see, his home.</i> (AA, S, M, K.)"

print(str_p_new.encode("unicode_escape"))

print()

print("Processed (new, final):")
str_p_final = "<h3>أَبَّ | 1: فَعَل/فَعُل/فَعِل</h3> , (T, S, M, &c.,) aor. ﹻ, (M, K,) agreeably with analogy in the case of an intrans. verb of this class, (TA,) and ﹹ, (AZ, T, S, M, K,) contr. to analogy, (TA,) inf. n. أَبٌّ (T, S, M, K) and أَبِيبٌ (M, K) and أَبَابٌ and أَبَابَةٌ (S, M, K) and إِبَابَةٌ; (M;) and <b>↓</b>ٱئْتَبَّ [written with the disjunctive alif اِيتَبَّ]; (T, K;) <i>He prepared himself,</i> (AZ, S, M, A, K,) and <i>equipped himself,</i> (AZ, S, A,) for (لِ) departing, or going away, (AZ, S,) or for journeying: (M, A, K:) or <i>he determined upon journeying, and prepared himself.</i> (T.) El-Aashà says, <center> <h3> صَرَمْتُ وَلَمْ أَصْرِمْكُمُ وَكَصَارِمٍ </h3> <h3> أَخٌ قَدْ طَوَى كَشْحًا وَأَبَّ لِيَذْهَبَا </h3> </center> (T, S, M, TA,) i. e. <i>I cut</i> [in effect, <i>while I did not really cut</i>] <i> you: for like one who cuts is a brother who has determined and prepared to go away.</i> (TA.) [Hence,] لَا عَبَابَ وَ لَا أَبَابَ, [or لا عَبَابِ ولا أَبَابِ,] a prov. [which see explained in art. عب]. (TA.) [And hence the saying,] هُوَ فِى أَبَابِهِ, (S, M, K,) and أَبَابَتِهِ, and إِبَابَتِهِ, (M,) <i>He is in his</i> [<i>state of,</i> or <i>he is engaged in his,</i>] <i>preparation</i> or <i>equipment</i> [for departing or journeying]. (S, M, K.) The hemzeh in أَبَّ is sometimes changed into و; and thus وَبَّ, inf. n. وَبٌّ, signifies <i>He prepared himself to assault,</i> or <i>charge, in battle.</i> (T, TA.) <br /><b>SIGNIFICATION ___</b> أَبَّتْ أَبَابَتُهُ, and إِبَابَتُهُ, <i>His way,</i> or <i>course, of acting,</i> or <i>conduct,</i> or <i>the like, was,</i> or <i>became, rightly directed,</i> or <i>ordered.</i> (M, K.) <br /><b>SIGNIFICATION ___</b> أَبَّ <b>↓</b>أَبَّهُ <i>i. q.</i> قَصَدَ قَصْدَهُ, (K,) which signifies <i>He tended, repaired, betook himself,</i> or <i>directed his course, towards him,</i> or <i>it:</i> (S and Msb in art. قصد:) and also, <i>he pursued his</i> (another‘s) <i>course, doing as he</i> (the latter) <i>did.</i> (L in art. وكد.) <br /><b>SIGNIFICATION ___</b> أَبَّدِ إِلَى وَطَنِهِ, (M, K,) aor. ﹻ (IDrd, M, K) and ﹹ, (K,) inf. n. أَبٌّ (AA, S, M, K) and إِبَابَةٌ and أَبَابَةٌ (M, K,) and أَبَابٌ, (TA,) <i>He yearned for, longed for,</i> or <i>longed to see, his home.</i> (AA, S, M, K.)"

print(str_p_final.encode("unicode_escape"))

Processed (old, final):
b'<h3>\\u0623\\u064e\\u0628\\u0651\\u064e -> I: \\u0641\\u064e\\u0639\\u064e\\u0644/\\u0641\\u064e\\u0639\\u064f\\u0644/\\u0641\\u064e\\u0639\\u0650\\u0644</h3> , (T, S, M, &c.,) aor. \\ufe7b, (M, K,) agreeably with analogy in the case of an intrans. verb of this class, (TA,) and \\ufe79, (AZ, T, S, M, K,) contr. to analogy, (TA,) inf. n. \\u0623\\u064e\\u0628\\u0651\\u064c (T, S, M, K) and \\u0623\\u064e\\u0628\\u0650\\u064a\\u0628\\u064c (M, K) and \\u0623\\u064e\\u0628\\u064e\\u0627\\u0628\\u064c and \\u0623\\u064e\\u0628\\u064e\\u0627\\u0628\\u064e\\u0629\\u064c (S, M, K) and \\u0625\\u0650\\u0628\\u064e\\u0627\\u0628\\u064e\\u0629\\u064c; (M;) and <b>\\u2193</b>\\u0671\\u0626\\u0652\\u062a\\u064e\\u0628\\u0651\\u064e [written with the disjunctive alif \\u0627\\u0650\\u064a\\u062a\\u064e\\u0628\\u0651\\u064e]; (T, K;) <i>He prepared himself,</i> (AZ, S, M, A, K,) and <i>equipped himself,</i> (AZ, S, A,) for (\\u0644\\u0650) departing, or going away, (AZ, S,)

In [None]:
from google.colab import files
files.download('lanelexicon.sqlite')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>