In [1]:
import sqlite3
conn = sqlite3.connect(':memory:')
cur = conn.cursor()
conn.enable_load_extension(True)

for (val,) in cur.execute('pragma compile_options'): 
    print (val)

COMPILER=clang-4.0.1
ENABLE_COLUMN_METADATA
ENABLE_DBSTAT_VTAB
ENABLE_FTS3_TOKENIZER
ENABLE_FTS4
ENABLE_FTS5
ENABLE_JSON1
ENABLE_RTREE
ENABLE_UNLOCK_NOTIFY
MAX_EXPR_DEPTH=10000
MAX_VARIABLE_NUMBER=250000
SECURE_DELETE
THREADSAFE=1


https://komorandocs.readthedocs.io/ko/latest/pykomoran/tutorial.html

In [3]:
from PyKomoran import *
komoran = Komoran(DEFAULT_MODEL['LIGHT'])
komoran.set_user_dic('dic/user.dic')
komoran.get_nouns("김문근은 천재인가요")

['김문근', '천재']

In [82]:
from peewee import *
from playhouse.sqlite_ext import *
import datetime
from collections import defaultdict
from anytree import Node, RenderTree
from PyKomoran import *
komoran = Komoran(DEFAULT_MODEL['LIGHT'])
komoran.set_user_dic('dic/user.dic')

# pragmas = [
#     ('journal_mode', 'wal'),
#     ('cache_size', -1024 * 32)]
# db = SqliteExtDatabase('blog.db', pragmas=pragmas)
db = SqliteExtDatabase('treememo.db')


class Entry(Model):
    content = TextField()
    create_at = DateTimeField(default=datetime.datetime.now)
    modify_at = DateTimeField(default=datetime.datetime.now)

    class Meta:
        database = db
        
class FTSEntry(FTSModel):
    content = TextField()

    class Meta:
        database = db        
        
class Tag(Model):
    title = TextField()
    create_at = DateTimeField(default=datetime.datetime.now)
    modify_at = DateTimeField(default=datetime.datetime.now)

    class Meta:
        database = db
        
class TagMap(Model):
    tag_id = ForeignKeyField(Tag, backref='map_tag')
    entry_id = ForeignKeyField(Entry, backref='map_entry')

    class Meta:
        database = db
        
class Document(Model):
    title = TextField()
    create_at = DateTimeField(default=datetime.datetime.now)
    modify_at = DateTimeField(default=datetime.datetime.now)

    class Meta:
        database = db
    
class DocumentEntry(Model):
    document_id = ForeignKeyField(Document, backref='map_document')
    entry_id = ForeignKeyField(Entry, backref='map_entry')
    parent_id = ForeignKeyField('self', null=True, backref='map_parent')
    prev_id = ForeignKeyField('self', null=True, backref='map_prev')

    class Meta:
        database = db
        
        
class NoDocumentEntryException(Exception):
    pass
        
    
def add_content(content):
    entry = Entry.create(content=content)
    FTSEntry.create(docid=entry.id, content='\n'.join(komoran.get_nouns(content)))
    return entry
    

def get_tag(title):
    try:
        tag = Tag.get(Tag.title == title)
        print('exist - ', title, tag.title, tag.id)
    except:    
        tag = Tag.create(title=title)
        print('new - ', title, tag.title, tag.id)
    return tag
    
def add_tag(docid, title):
    tag = get_tag(title)
    print(tag)
    TagMap.create(tag_id=tag.id, entry_id=docid)
    
    
def find_entry(content):
    return [entry for entry in Entry.select().join(FTSEntry, on=(Entry.id == FTSEntry.docid)).where(FTSEntry.match(content)).order_by(FTSEntry.rank()).dicts()]

def find_tag(title):
    return [tag for tag in Tag.select().where(Tag.title.startswith(title)).dicts()]

def find_tag_entry(tag_id):
    return [entry for entry in Entry.select().join(TagMap, on=TagMap.entry_id).where(TagMap.tag_id == tag_id).dicts()]

def add_document(title):
    return Document.create(title=title)

def get_document_entry(document_entry_id):
    try:
        return DocumentEntry.select().where(DocumentEntry.id == document_entry_id).get()
    except:
        raise NoDocumentEntryException('No Item #' + str(document_entry_id))

def get_native_document_entry(document_id, parent_id, prev_id):
    try:
        return DocumentEntry.select().where(DocumentEntry.document_id == document_id, DocumentEntry.parent_id == parent_id, DocumentEntry.prev_id == prev_id).get()
    except:
        return None

def add_document_entry(document_id, entry_id, parent_id, prev_id):
    if parent_id != None:
        get_document_entry(parent_id)
    if prev_id != None:
        get_document_entry(prev_id)
    native = get_native_document_entry(document_id, parent_id, prev_id)
    new_doc_entry = DocumentEntry.create(document_id=document_id, entry_id=entry_id, parent_id=parent_id, prev_id=prev_id)
    if native:
        native.prev_id = new_doc_entry.id
        native.save()
    return new_doc_entry
    
def get_document_entries(document_id):
    return [document_entry for document_entry in DocumentEntry.select(DocumentEntry, Entry.content).join(Entry, on=Entry.id == DocumentEntry.entry_id).where(DocumentEntry.document_id == document_id).dicts()]


def add_two_items(lists, prev_item, next_item):
    if not prev_item:
        if next_item not in lists:
            lists.insert(0, next_item)
    else:
        if prev_item not in lists:
            lists.insert(0, prev_item)
            lists.insert(1, next_item)
        else:
            lists.insert(lists.index(prev_item)+1, next_item)
    return lists

        
# Working....
def get_document_tree(document_id):
    parent_map = defaultdict([])
    prev_map = defaultdict([])
    for entry in get_document_entries(document_id):
        ids = parent_map[entry['parent_id']]
        if entry['prev_id'] == None:
            ids.insert(0, entry['id'])
        else:
            if entry['prev_id'] in ids:
                ids.insert(ids.index(entry['prev_id'])+1, entry['id'])
            else:
                prev_map[entry['prev_id']].append(entry['id'])
            
            if entry['id'] in prev_map:
                
        
    return parent_map
    

def find_document(title=None):
    if not title:
        return [document for document in Document.select().dicts()]
    
    return [document for document in Document.select().where(Document.title.startswith(title)).dicts()]
    
db.create_tables([Entry, FTSEntry, Tag, TagMap, Document, DocumentEntry])    

IndentationError: expected an indented block (<ipython-input-82-549ae7df202d>, line 159)

In [7]:
doc = add_content('김문근')
doc = add_content('김문근 천재')
doc = add_content('김문근 천재 인가요?')

In [68]:
add_content('학교 가자')

<Entry: 4>

In [69]:
find_entry('학교')

[{'id': 4,
  'content': '학교 가자',
  'create_at': datetime.datetime(2020, 12, 12, 21, 10, 39, 758467),
  'modify_at': datetime.datetime(2020, 12, 12, 21, 10, 39, 758472)}]

In [11]:
find_tag('인')

[{'id': 1,
  'title': '인물',
  'create_at': datetime.datetime(2020, 12, 12, 20, 5, 52, 957749),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 5, 52, 957752)}]

In [10]:
add_tag(1, '인물')

new -  인물 인물 1
1


In [12]:
add_tag(1, '인사')
add_tag(2, '인사')

new -  인사 인사 2
2
exist -  인사 인사 2
2


In [13]:
find_tag_entry(1)

[{'id': 1,
  'content': '김문근',
  'create_at': datetime.datetime(2020, 12, 12, 20, 5, 48, 884685),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 5, 48, 884688)}]

In [19]:
add_document('연습2')

<Document: 2>

In [45]:
add_document_entry(1, 3, 3, 100)

NoDocumentEntryException: No Item #100

In [81]:
get_document_entries(1)

[{'id': 1,
  'document_id': 1,
  'entry_id': 1,
  'parent_id': None,
  'prev_id': None,
  'content': '김문근'},
 {'id': 2,
  'document_id': 1,
  'entry_id': 2,
  'parent_id': None,
  'prev_id': 4,
  'content': '김문근 천재'},
 {'id': 4,
  'document_id': 1,
  'entry_id': 3,
  'parent_id': None,
  'prev_id': 1,
  'content': '김문근 천재 인가요?'},
 {'id': 5,
  'document_id': 1,
  'entry_id': 4,
  'parent_id': 1,
  'prev_id': None,
  'content': '학교 가자'}]

In [70]:
add_document_entry(1, 4, 1, None)

<DocumentEntry: 5>

In [26]:
get_native_document_entry(1, None, 3)

In [57]:
find_document()

[{'id': 1,
  'title': '연습',
  'create_at': datetime.datetime(2020, 12, 12, 20, 6, 33, 381421),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 6, 33, 381426)},
 {'id': 2,
  'title': '연습2',
  'create_at': datetime.datetime(2020, 12, 12, 20, 10, 41, 509277),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 10, 41, 509280)}]

In [58]:
find_document('연')

[{'id': 1,
  'title': '연습',
  'create_at': datetime.datetime(2020, 12, 12, 20, 6, 33, 381421),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 6, 33, 381426)},
 {'id': 2,
  'title': '연습2',
  'create_at': datetime.datetime(2020, 12, 12, 20, 10, 41, 509277),
  'modify_at': datetime.datetime(2020, 12, 12, 20, 10, 41, 509280)}]

In [80]:
get_document_tree(1)

{None: [4, 2, 1, 4], 1: [5]}

In [74]:
[1, 2, 3].index(None)

ValueError: None is not in list