Merge pull request #44 from word-way/word-way-backend-38

단어가 없는 발음을 기준으로 단어를 추가하는 테스크 추가하기
k-roffle · Oct 11, 2020 · f197198 · f197198
2 parents 08ceeb1 + 1c4abc1
commit f197198
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 4 deletions.
diff --git a/word_way/migrations/versions/6013d366a3a0_add_scrapped_at_to_pronunciation.py b/word_way/migrations/versions/6013d366a3a0_add_scrapped_at_to_pronunciation.py
@@ -0,0 +1,25 @@
+"""Add scrapped_at to Pronunciation
+
+Revision ID: 6013d366a3a0
+Revises: 94abc68d90ce
+Create Date: 2020-10-11 16:39:21.931294
+
+"""
+from alembic import op
+from sqlalchemy import Column, DateTime
+
+revision = '6013d366a3a0'
+down_revision = '94abc68d90ce'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.add_column(
+        'pronunciation',
+        Column('scrapped_at', DateTime(timezone=True), nullable=True)
+    )
+
+
+def downgrade():
+    op.drop_column('pronunciation', 'scrapped_at')
diff --git a/word_way/models.py b/word_way/models.py
@@ -1,7 +1,9 @@
 import typing
 import uuid
 
-from sqlalchemy import Column, ForeignKey, Integer, Unicode, UniqueConstraint
+from sqlalchemy import (
+    Column, DateTime, ForeignKey, Integer, Unicode, UniqueConstraint
+)
 from sqlalchemy.orm import relationship
 from sqlalchemy_enum34 import EnumType
 from sqlalchemy_utils.types.uuid import UUIDType
@@ -34,6 +36,9 @@ class Pronunciation(Base):
     #: (:class:`str`) 발음
     pronunciation = Column(Unicode, unique=True, nullable=False)
 
+    #: (:class:`datetime.datetime`) 발음에 해당하는 단어들을 스크래핑한 시각
+    scrapped_at = Column(DateTime(timezone=True))
+
     words = relationship('Word', uselist=True, back_populates='pronunciation')
 
     word_relation = relationship(

diff --git a/word_way/scrapping/word.py b/word_way/scrapping/word.py
@@ -17,9 +17,9 @@
 from word_way.models import (IncludeWordRelation, Pronunciation, Sentence,
                              Word, WordSentenceAssoc)
 from word_way.scrapping.word_parser import WordParser
-from word_way.utils import convert_word_part
+from word_way.utils import convert_word_part, utc_now
 
-__all__ = 'save_word', 'save_word_task',
+__all__ = ('save_word', 'save_word_task', 'save_words_task',)
 
 logger = logging.getLogger(__name__)
 
@@ -29,6 +29,21 @@ def save_word_task(target_word: str):
     save_word(target_word, session)
 
 
+@celery.task
+def save_words_task():
+    """단어가 없는 발음을 가져와서 단어를 저장하는 테스크"""
+    subquery = session.query(Word).filter(
+        Word.pronunciation_id == Pronunciation.id
+    )
+    pronunciations = session.query(Pronunciation).filter(
+        ~subquery.exists(),
+        Pronunciation.scrapped_at.is_(None),
+    ).all()
+    for pronunciation in pronunciations:
+        save_word(pronunciation.pronunciation, session)
+    session.commit()
+
+
 def save_word(
     target_word: str, session: Session,
 ) -> typing.Optional[uuid.UUID]:
@@ -38,7 +53,7 @@ def save_word(
     :type target_word: :class:`str`
     :param session: 사용할 세션
     :type session: :class:`sqlalchemy.orm.session.Session`
-    :return: target_word와 발음이 정확히 일치하는 발음 ID
+    :return: target_word와 발음이 정확히 일치하는 발음 ID, 없다면 None을 반환합니다
     :rtype: typing.Optional[uuid.UUID]
 
     """
@@ -93,6 +108,7 @@ def save_word(
             session.flush()
             save_include_word(word, session)
             save_example_sentence(word, session)
+        pronunciation.scrapped_at = utc_now()
         log.info(f'Done saving the word ({pronunciation_word})')
     session.commit()
     return pronunciation_id

diff --git a/word_way/utils.py b/word_way/utils.py
@@ -1,5 +1,9 @@
+import datetime
+
 from word_way.enum import WordPart
 
+__all__ = ('convert_word_part', 'utc_now',)
+
 
 def convert_word_part(part: str):
     return {
@@ -12,3 +16,7 @@ def convert_word_part(part: str):
         '부사': WordPart.adverb,
         '감탄사': WordPart.interjection,
     }.get(part, WordPart.unknown)
+
+
+def utc_now():
+    return datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)