https://www.flowdas.com/blog/iterators-in-python/index.html

http://pythonstudy.xyz/python/article/23-Iterator%EC%99%80-Generator

In [None]:
import abc
class AbstractCrawler(object):
    
    def __init__(self, url=None, default_headers= None, request_class=None, parser_class=None, config=None, **kwargs):
        self.base_url = url
    
    def _crawl(self, param=None, headers=None):
        url = self.make_url(self.base_url, param)
        header = self.__make_headers(headers)
        resp = request_class(url, header=header).get_data()
        pass
    
    @abc.abstractmethod
    def crawl(self, param, headers):
        return self._crawl(param, headers)
    
    def __make_headers(self,headers):
        return self.default_headers.update(headers)
    
    @abc.abstractmethod
    def __make_url(self, base_url, params):
        pass
        
    
    @abc.abstractmethod
    def run(self):
        data = self.crawl()
        self.save(data)
        pass
    
    @abc.abstractmethod
    def save(self):
        # db handling
        pass


In [90]:
import datetime
from datetime import timedelta

In [29]:
# 날짜 다시 입력해야 할 때 에러 발생시키자.
class NotAValidDateError(Exception):
    def __init__(self):
        super().__init__('마지막 검색 날짜를 다시 설정하십시오.')

In [108]:
class DateRange:
    def __init__(self, start_date, end_date):
        self.start_date = datetime.datetime.strptime(str(start_date), "%Y%m%d")
        self.end_date = datetime.datetime.strptime(str(end_date), "%Y%m%d") - timedelta(days=1)
        
        if self.start_date == self.end_date:
            raise NotAValidDateError

        self.size = (self.end_date - self.start_date).days
        self.data = [self.start_date + timedelta(n) for n in range(self.size)]
        
    def __repr__(self):
        return "날짜 설정 : {0} ~ {1}".format(self.start_date, self.end_date)
    
    def __iter__(self):
        self.index = 0
        return self
    
    def __next__(self):
        if self.index >= self.size:
            raise StopIteration

        n = self.data[self.index]
        self.index += 1
        return n

In [109]:
DateRange(20200601, 20200603)

날짜 설정 : 2020-06-01 00:00:00 ~ 2020-06-02 00:00:00

In [110]:
for d in DateRange(20200601, 20200620):
    print(d)

2020-06-01 00:00:00
2020-06-02 00:00:00
2020-06-03 00:00:00
2020-06-04 00:00:00
2020-06-05 00:00:00
2020-06-06 00:00:00
2020-06-07 00:00:00
2020-06-08 00:00:00
2020-06-09 00:00:00
2020-06-10 00:00:00
2020-06-11 00:00:00
2020-06-12 00:00:00
2020-06-13 00:00:00
2020-06-14 00:00:00
2020-06-15 00:00:00
2020-06-16 00:00:00
2020-06-17 00:00:00
2020-06-18 00:00:00


In [111]:
class myTweetCrawler(DateRange):
    pass

연습

- 밑에 함수를 아예 크롤러에 넣는 게 낫나요?

In [80]:
def daterange(start_date, end_date): # setUntil 마지막 날짜 안 포함.
    start_date = datetime.datetime.strptime(str(start_date), "%Y%m%d")
    end_date = datetime.datetime.strptime(str(end_date), "%Y%m%d")
    
    if end_date == start_date:
        raise NotValidEndDateError
    else:
        for n in range(int((end_date - start_date).days)):
            yield start_date + datetime.timedelta(n)

gen = daterange(20200601, 20200610)
for x in gen:
    print(x)

2020-06-01 00:00:00
2020-06-02 00:00:00
2020-06-03 00:00:00
2020-06-04 00:00:00
2020-06-05 00:00:00
2020-06-06 00:00:00
2020-06-07 00:00:00
2020-06-08 00:00:00
2020-06-09 00:00:00
