In [None]:
import nltk
from nltk.corpus import wordnet as reader
import nltk.corpus.reader.wordnet as wordnet
import sqlite3 as sql

In [None]:
wn = wordnet.WordNetCorpusReader(reader.root, reader._omw_reader)

In [None]:
synsets = wn.synsets('word')

synsets = sorted(synsets, key=lambda syn: sum(lem.count() for lem in syn.lemmas()))

for syn in synsets:
    print(syn.definition())

In [None]:
OLD_DICT = 'data/output/dictionary.sqlite'
NEW_DICT = 'data/output/dictionary_1.sqlite'

In [None]:
with sql.connect(NEW_DICT) as conn, sql.connect(OLD_DICT) as old_conn:
    conn.execute("""
        CREATE TABLE word (
            wordId INTEGER NOT NULL PRIMARY KEY,
            word TEXT UNIQUE NOT NULL
        )
    """)
    conn.execute("""
        CREATE TABLE definition (
            definitionId INTEGER NOT NULL PRIMARY KEY,
            wordId INTEGER NOT NULL,
            definition TEXT NOT NULL,
            pos TEXT NOT NULL,
            frequency INTEGER NOT NULL,
            FOREIGN KEY(wordId) REFERENCES word(ROWID)
        )
    """)
    conn.execute("""
        CREATE TABLE example (
            exampleId INTEGER NOT NULL PRIMARY KEY,
            example TEXT NOT NULL,
            definitionId INTEGER NOT NULL,
            FOREIGN KEY(definitionId) REFERENCES definition(ROWID)
        )
    """)

    

In [None]:
def process_word(word: str):
    return word.replace('_', ' ')

In [None]:
def calculate_frequency(syn):
    count = 0
    for lem in syn.lemmas():
        count += lem.count()
    return count

In [None]:
# move words from old to new
with sql.connect(NEW_DICT) as conn, sql.connect(OLD_DICT) as old_conn:
    old_words = old_conn.cursor()
    old_words.execute("SELECT rowid, word FROM word")

    for i, row in enumerate(old_words):
        wordId = row[0]
        word = row[1]

        conn.execute('''
            INSERT INTO word(wordId, word) 
            VALUES (:wordId, :word)''',
            {
                'wordId': wordId,
                'word': process_word(word),
            }
        )

        print(f'{i}: {word}')

        for syn in wn.synsets(word):
            defCursor = conn.execute("""
                INSERT INTO definition (wordId, definition, pos, frequency)
                VALUES (:wordId, :definition, :pos, :frequency)
            """, {
                'wordId': wordId,
                'definition': syn.definition(),
                'pos': syn.pos(),
                'frequency': calculate_frequency(syn)
            })
            defId = defCursor.lastrowid

            for ex in syn.examples():
                conn.execute("""
                    INSERT INTO example (example, definitionId)
                    VALUES (:example, :definitionId)
                """, {
                    'example': ex,
                    'definitionId': defId
                })

In [None]:
for syn in wn.synsets('.22-caliber'):
    print(calculate_frequency(syn))
    print(syn.definition())
