lumyjuwon · lumyjuwon · Mar 27, 2022 · Mar 22, 2022 · Mar 22, 2022 · Mar 22, 2022
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2020 lumyjuwon
+Copyright (c) 2022 lumyjuwon
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/dist/KoreaNewsCrawler-1.51-py3-none-any.whl b/dist/KoreaNewsCrawler-1.51-py3-none-any.whl
diff --git a/korea_news_crawler/articlecrawler.py b/korea_news_crawler/articlecrawler.py
@@ -18,7 +18,7 @@ def __init__(self):
         self.categories = {'정치': 100, '경제': 101, '사회': 102, '생활문화': 103, '세계': 104, 'IT과학': 105, '오피니언': 110,
                            'politics': 100, 'economy': 101, 'society': 102, 'living_culture': 103, 'world': 104, 'IT_science': 105, 'opinion': 110}
         self.selected_categories = []
-        self.date = {'start_year': 0, 'start_month': 0, 'end_year': 0, 'end_month': 0}
+        self.date = {'start_year': 0, 'start_month': 0, 'start_day' : 0, 'end_year': 0, 'end_month': 0, 'end_day':0}
         self.user_operating_system = str(platform.system())
 
     def set_category(self, *args):
@@ -27,47 +27,94 @@ def set_category(self, *args):
                 raise InvalidCategory(key)
         self.selected_categories = args
 
-    def set_date_range(self, start_year, start_month, end_year, end_month):
-        args = [start_year, start_month, end_year, end_month]
+    def set_date_range(self, start_date:str, end_date:str):
+        start = list(map(int, start_date.split("-")))
+        end = list(map(int, end_date.split("-")))
+
+        # Setting Start Date
+        if len(start) == 1: # Input Only Year
+            start_year = start[0]
+            start_month = 1
+            start_day = 1
+        elif len(start) == 2: # Input Year and month
+            start_year, start_month = start
+            start_day = 1
+        elif len(start) == 3: # Input Year, month and day
+            start_year, start_month, start_day = start
+
+        # Setting End Date
+        if len(end) == 1: # Input Only Year
+            end_year = end[0]
+            end_month = 12
+            end_day = 31
+        elif len(end) == 2: # Input Year and month
+            end_year, end_month = end
+            end_day = calendar.monthrange(end_year, end_month)[1]
+        elif len(end) == 3: # Input Year, month and day
+            end_year, end_month, end_day = end
+
+        args = [start_year, start_month, start_day, end_year, end_month, end_day]
+
         if start_year > end_year:
             raise InvalidYear(start_year, end_year)
         if start_month < 1 or start_month > 12:
             raise InvalidMonth(start_month)
         if end_month < 1 or end_month > 12:
             raise InvalidMonth(end_month)
+        if start_day < 1 or calendar.monthrange(start_year, start_month)[1] < start_day:
+            raise InvalidDay(start_day)
+        if end_day < 1 or calendar.monthrange(end_year, end_month)[1] < end_day:
+            raise InvalidDay(end_day)
         if start_year == end_year and start_month > end_month:
             raise OverbalanceMonth(start_month, end_month)
+        if start_year == end_year and start_month == end_month and start_day > end_day:
+            raise OverbalanceDay(start_day, end_day)
+
         for key, date in zip(self.date, args):
             self.date[key] = date
         print(self.date)
 
     @staticmethod
-    def make_news_page_url(category_url, start_year, end_year, start_month, end_month):
+    def make_news_page_url(category_url, date):
         made_urls = []
-        for year in range(start_year, end_year + 1):
-            target_start_month = start_month
-            target_end_month = end_month
-
-            if start_year != end_year:
-                if year == start_year:
-                    target_start_month = start_month
+        for year in range(date['start_year'], date['end_year'] + 1):
+            if date['start_year'] == date['end_year']:
+                target_start_month = date['start_month']
+                target_end_month = date['end_month']
+            else:
+                if year == date['start_year']:
+                    target_start_month = date['start_month']
                     target_end_month = 12
-                elif year == end_year:
+                elif year == date['end_year']:
                     target_start_month = 1
-                    target_end_month = end_month
+                    target_end_month = date['end_month']
                 else:
                     target_start_month = 1
                     target_end_month = 12
-            
+
             for month in range(target_start_month, target_end_month + 1):
-                for month_day in range(1, calendar.monthrange(year, month)[1] + 1):
+                if date['start_month'] == date['end_month']:
+                    target_start_day = date['start_day']
+                    target_end_day = date['end_day']
+                else:
+                    if year == date['start_year'] and month == date['start_month']:
+                        target_start_day = date['start_day']
+                        target_end_day = calendar.monthrange(year, month)[1]
+                    elif year == date['end_year'] and month == date['end_month']:
+                        target_start_day = 1
+                        target_end_day = date['end_day']
+                    else:
+                        target_start_day = 1
+                        target_end_day = calendar.monthrange(year, month)[1]
+
+                for day in range(target_start_day, target_end_day + 1):
                     if len(str(month)) == 1:
                         month = "0" + str(month)
-                    if len(str(month_day)) == 1:
-                        month_day = "0" + str(month_day)
+                    if len(str(day)) == 1:
+                        day = "0" + str(day)
 
                     # 날짜별로 Page Url 생성
-                    url = category_url + str(year) + str(month) + str(month_day)
+                    url = category_url + str(year) + str(month) + str(day)
 
                     # totalpage는 네이버 페이지 구조를 이용해서 page=10000으로 지정해 totalpage를 알아냄
                     # page=10000을 입력할 경우 페이지가 존재하지 않기 때문에 page=totalpage로 이동 됨 (Redirect)
@@ -94,12 +141,11 @@ def crawling(self, category_name):
         writer = Writer(category='Article', article_category=category_name, date=self.date)
         # 기사 url 형식
         url_format = f'http://news.naver.com/main/list.nhn?mode=LSD&mid=sec&sid1={self.categories.get(category_name)}&date='
-        # start_year년 start_month월 ~ end_year의 end_month 날짜까지 기사를 수집합니다.
-        target_urls = self.make_news_page_url(url_format, self.date['start_year'], self.date['end_year'], self.date['start_month'], self.date['end_month'])
-
-        print(category_name + " Urls are generated")
-        print("The crawler starts")
+        # start_year년 start_month월 start_day일 부터 ~ end_year년 end_month월 end_day일까지 기사를 수집합니다.
+        target_urls = self.make_news_page_url(url_format, self.date)
+        print(f'{category_name} Urls are generated')
 
+        print(f'{category_name} is collecting ...')
         for url in target_urls:
             request = self.get_url_data(url)
             document = BeautifulSoup(request.content, 'html.parser')
@@ -186,5 +232,5 @@ def start(self):
 if __name__ == "__main__":
     Crawler = ArticleCrawler()
     Crawler.set_category('생활문화')
-    Crawler.set_date_range(2018, 1, 2018, 2)
+    Crawler.set_date_range('2018-01', '2018-02')
     Crawler.start()
diff --git a/korea_news_crawler/exceptions.py b/korea_news_crawler/exceptions.py
@@ -53,6 +53,15 @@ def __init__(self, month):
     def __str__(self):
         return self.message
 
+# 일이 올바르지 않을 때
+class InvalidDay(Exception):
+    def __init__(self, day):
+        self.message = f'{day} is an invalid day'
+
+    def __str__(self):
+        return self.message
+
+
 
 # 시작 달과 끝나는 달이 올바르지 않을 때
 class OverbalanceMonth(Exception):
@@ -62,6 +71,13 @@ def __init__(self, start_month, end_month):
     def __str__(self):
         return self.message
 
+class OverbalanceDay(Exception):
+    def __init__(self, start_day, end_day):
+        self.message = f'{start_day}(start day) is an overbalance with {end_day}(end day)'
+
+    def __str__(self):
+        return self.message
+
 
 # 실행시간이 너무 길어서 데이터를 얻을 수 없을 때
 class ResponseTimeout(Exception):

diff --git a/korea_news_crawler/sample.py b/korea_news_crawler/sample.py
@@ -1,9 +1,8 @@
 from korea_news_crawler.articlecrawler import ArticleCrawler
-
 if __name__ == "__main__":
     Crawler = ArticleCrawler()
     # 정치, 경제, 생활문화, IT과학, 사회, 세계 카테고리 사용 가능
-    Crawler.set_category("IT과학", "경제", "생활문화", "IT과학", "사회", "세계")
-    # 2017년 12월부터 2018년 1월까지 크롤링 시작
-    Crawler.set_date_range(2017, 12, 2018, 1)
+    Crawler.set_category("IT과학", "세계")
+    # 2017년 12월 (1일) 부터 2018년 1월 13일까지 크롤링 시작 YYYY-MM-DD의 형식으로 입력
+    Crawler.set_date_range('2017-12', '2018-01-13')
     Crawler.start()
diff --git a/korea_news_crawler/writer.py b/korea_news_crawler/writer.py
@@ -7,9 +7,10 @@ class Writer(object):
     def __init__(self, category, article_category, date):
         self.start_year = date['start_year']
         self.start_month = f'0{date["start_month"]}' if len(str(date['start_month'])) == 1 else str(date['start_month'])
+        self.start_day = f'0{date["start_day"]}' if len(str(date['start_day'])) == 1 else str(date['start_day'])
         self.end_year = date['end_year']
         self.end_month = f'0{date["end_month"]}' if len(str(date['end_month'])) == 1 else str(date['end_month'])
-
+        self.end_day = f'0{date["end_day"]}' if len(str(date['end_day'])) == 1 else str(date['end_day'])
         self.file = None
         self.initialize_file(category, article_category)
 
@@ -20,7 +21,7 @@ def initialize_file(self, category, article_category):
         if os.path.exists(output_path) is not True:
             os.mkdir(output_path)
 
-        file_name = f'{output_path}/{category}_{article_category}_{self.start_year}{self.start_month}_{self.end_year}{self.end_month}.csv'
+        file_name = f'{output_path}/{category}_{article_category}_{self.start_year}{self.start_month}{self.start_day}_{self.end_year}{self.end_month}{self.end_day}.csv'
         if os.path.isfile(file_name):
             raise ExistFile(file_name)
 

diff --git a/setup.cfg b/setup.cfg
@@ -1,2 +1,2 @@
 [metadata]
-description-file = README.md
+description_file = README.md
diff --git a/setup.py b/setup.py
@@ -1,16 +1,16 @@
 from setuptools import setup
 
 # build package command: python setup.py bdist_wheel
-# release package command: twine upload dist/KoreaNewsCrawler-version-py3-none-any.whl
+# release package command: twine upload dist/KoreaNewsCrawler-${version}-py3-none-any.whl
 
 setup(
     name             = 'KoreaNewsCrawler',
-    version          = '1.50',
+    version          = '1.51',
     description      = 'Crawl the korean news',
     author           = 'lumyjuwon',
     author_email     = 'lumyjuwon@gmail.com',
     url              = 'https://github.com/lumyjuwon/KoreaNewsCrawler',
-    download_url     = 'https://github.com/lumyjuwon/KoreaNewsCrawler/archive/1.50.tar.gz',
+    download_url     = 'https://github.com/lumyjuwon/KoreaNewsCrawler/archive/1.51.tar.gz',
     install_requires = ['requests', 'beautifulsoup4'],
     packages         = ['korea_news_crawler'],
     keywords         = ['crawl', 'KoreaNews', 'crawler'],