## 항공가격 배치 크롤링
- 크롤링 대상 사이트, 노선, 날짜 리스트 정보를 이용하여 크롤링 배치 처리
- 실시간 사이트의 경우 대상 항공사 코드 정보 필요
    * 세팅정보 처리 : 크롤링 함수 부분

In [1]:
from collections import namedtuple
from datetime import datetime, timedelta
import requests
import shutil
from common.env_variable import *
from common.batch_util import *
from common.crawl_func import *
from common.log_util import *
from common.util import save_raw_data
## 로그 초기화
logger_initialize('crawl_logger_setting.json')
init_env_variable('common/env_variable.json')

In [2]:
## 노선과 사이트 정보리스트를 이용하여 크롤링 실행 함수
## 크롤링 날짜 지정 YYYYMMDDHH 년월일시간
def crawl_by_route_siteinfos(crawl_date, infos):
    log_msgs = ['start batch job','craw_by_route_siteinfos : {}-{}'.format(crawl_date,len(infos))]
    log(log_msgs,level=logging.INFO)
    start_time = datetime.today()
    error_infos = []
    for info in infos:
        dom_int,dpt,arr,site,airlines,dates = info
        try:
            crawl_by_route_site(crawl_date,dom_int,dpt,arr,site,dates,airlines)
        except:
            error_infos.append(info)
            log_msgs = ['*** Error occured! when crawling condition - {},{},{},{},{},{}'.format(dom_int,dpt,arr,site,airlines,dates)]
            log(log_msgs,level=logging.ERROR)
    end_time = datetime.today()
    log_msgs = ['end batch job','craw_by_route_siteinfos',
                'elapsed -{}'.format(end_time-start_time),'Total infos({})'.format(len(infos))]
    log(log_msgs,level=logging.INFO)
    log_msgs = ['Total {} site info, {} error occured'.format(len(infos),len(error_infos))]
    log(log_msgs,level=logging.INFO)
    return error_infos

## 노선과 사이트 정보를 이용 크롤링 실행 함수
def crawl_by_route_site(crawl_date,dom_int,dpt,arr,site,dates,airlines=None):
    log_msgs = ['start batch job','craw_by_route_site[{},{},{},{},{}]'.format(dpt,arr,site,dates,airlines)]
    log(log_msgs,level=logging.INFO)
    start_time = datetime.today()
    crawl_cnt = 0
    func, isairline = get_crawl_site_func(dom_int,site)
    log([func,isairline],level=logging.INFO)
    if func is None: ## None 아닌 경우만 진행
        log('Crawling Func not found!')
        return None
    for dpt_date in dates:
        if isairline: ## 항공사 사이트
            raw_data = crawling_func(func,dpt,arr,dpt_date)
            head = set_headinfo(site,dom_int,site,dpt,arr,dpt_date,crawl_date=crawl_date)
            file = file_name(site,dpt,arr,dpt_date)
            save_raw_data(file,raw_data,head=head)
            crawl_cnt += 1
        else: ## 실시간 사이트
            for airline in airlines:
                raw_data = crawling_func(func,airline,dpt,arr,dpt_date)
                head = set_headinfo(site,dom_int,airline,dpt,arr,dpt_date,crawl_date=crawl_date)
                file = file_name(site,dpt,arr,dpt_date,airline=airline)
                save_raw_data(file,raw_data,head=head)
                crawl_cnt += 1
    end_time = datetime.today()
    log_msgs = ['end batch job','craw_by_route_site',
                'elapsed -{}'.format(end_time-start_time),'Total crawl:{} files saved.'.format(crawl_cnt)]
    log(log_msgs,level=logging.INFO)
## YYYYMMDD 형식의 문자열 두개를 인자로 문자형 날짜 Range 생성
def timeiter(start,end,fmt='%Y%m%d'):
    date1 = datetime.strptime(start,'%Y%m%d')
    date2 = datetime.strptime(end,'%Y%m%d')
    delta = date2 - date1
    return (datetime.strftime(date1+timedelta(days=d),fmt) for d in range(delta.days + 1))
## YYYYMMDD 형식의 기준일로 부터 며칠 뒤의 날짜 구하기
def get_date(base_date=None,after=0,fmt='%Y%m%d'):
    if base_date is None:
        return (datetime.today() + timedelta(after)).strftime(fmt)
    else:
        return (datetime.strptime(base_date,'%Y%m%d') + timedelta(after)).strftime(fmt)

In [3]:
previous_crawled_date = get_date() ##오늘, 중도에 오류가 날경우 조정 하여 재실행
start = get_date(base_date=previous_crawled_date,after=1) ## 내일 날짜 구하기
end = get_date(base_date=previous_crawled_date,after=60)  ## 기간 날짜 구하기
dates = list(timeiter(start,end))
## 노선 기간별 변수
## 60일 변수
gmpcju60 = ['0','GMP', 'CJU', 'IP', ['7C','LJ','TW','ZE'],dates]
icnnrt60 = ['1','ICN', 'NRT', 'IP', ['7C','LJ','TW','ZE'],dates]
end = get_date(base_date=previous_crawled_date,after=21)  ## 기간 날짜 구하기
dates = list(timeiter(start,end))
## 국내선 21일 변수
gmpcju21 = ['0','GMP', 'CJU', 'IP', ['7C','LJ','TW','ZE'],dates]
cjugmp21 = ['0','CJU', 'GMP', 'IP', ['7C','LJ','TW','ZE'],dates]
gmppus21 = ['0','GMP', 'PUS', 'IP', ['7C','BX','OZ','ZE'],dates]
pusgmp21 = ['0','PUS', 'GMP', 'IP', ['7C','BX','OZ','ZE'],dates]
puscju21 = ['0','PUS', 'CJU', 'IP', ['7C','LJ','OZ','KE','ZE'],dates]
cjupus21 = ['0','CJU', 'PUS', 'IP', ['7C','LJ','OZ','KE','ZE'],dates]
cjjcju21 = ['0','CJJ', 'CJU', 'IP', ['7C','LJ','OZ','KE','ZE'],dates]
cjucjj21 = ['0','CJU', 'CJJ', 'IP', ['7C','LJ','OZ','KE','ZE'],dates]
kuvcju21 = ['0','KUV', 'CJU', 'IP', ['KE','ZE'],dates]
cjukuv21 = ['0','CJU', 'KUV', 'IP', ['KE','ZE'],dates]
## 국제선 국내출발 21일 변수, 인터파크
icnnrt21 = ['1','ICN', 'NRT', 'IP', ['7C','LJ','TW','ZE'],dates]
icnbkk21 = ['1','ICN', 'BKK', 'IP', ['7C','LJ','TW','ZE'],dates]
icnfuk21 = ['1','ICN', 'FUK', 'IP', ['7C','LJ','TW','ZE'],dates]
## 국제선  현지출발 21일 각 항공사
ze_nrticn21 = ['1','NRT', 'ICN', 'ZE', 'ZE',dates]
tw_nrticn21 = ['1','NRT', 'ICN', 'TW', 'TW',dates]
c7_nrticn21 = ['1','NRT', 'ICN', '7C', '7C',dates]
lj_nrticn21 = ['1','NRT/JPN', 'ICN/KOR', 'LJ', 'LJ',dates]
ze_bkkicn21 = ['1','BKK', 'ICN', 'ZE', 'ZE',dates]
tw_bkkicn21 = ['1','BKK', 'ICN', 'TW', 'TW',dates]
c7_bkkicn21 = ['1','BKK', 'ICN', '7C', '7C',dates]
lj_bkkicn21 = ['1','BKK/SEA', 'ICN/KOR', 'LJ', 'LJ',dates]
ze_fukicn21 = ['1','FUK', 'ICN', 'ZE', 'ZE',dates]
tw_fukicn21 = ['1','FUK', 'ICN', 'TW', 'TW',dates]
c7_fukicn21 = ['1','FUK', 'ICN', '7C', '7C',dates]
lj_fukicn21 = ['1','FUK/JPN', 'ICN/KOR', 'LJ', 'LJ',dates]

In [None]:
### 매일 1회 실시
conds = [
    ## 김포제주, 인천나리타 60일 데이터 - 출근후
     gmpcju60,icnnrt60,
    ## 21일 데이터 - 출근후
     cjugmp21,icnbkk21,icnfuk21,
    ## 국내선 기타 노선 21일(김포 부산, 부산 제주 - 왕복), 출근후
    gmppus21,pusgmp21,puscju21,
    #cjupus21,
    ## 국내선 기타 노선 21일(청주 제주, 군산 제주 - 왕복), 출근후
    cjjcju21,cjucjj21,kuvcju21,cjukuv21,
    ### 나리타, 후쿠오카, 방콕 현지 출발 21일 데이터
    ze_nrticn21,tw_nrticn21,c7_nrticn21,lj_nrticn21,
    ze_bkkicn21,tw_bkkicn21,c7_bkkicn21,lj_bkkicn21,
    ze_fukicn21,tw_fukicn21,c7_fukicn21,lj_fukicn21,
]
crawl_date = datetime.today().strftime('%Y%m%d') + '08'
error_infos = crawl_by_route_siteinfos(crawl_date,conds)

2017-08-10 08:04:50,240 root     INFO     start batch job :: craw_by_route_siteinfos : 2017081008-24
2017-08-10 08:04:50,271 root     INFO     start batch job :: craw_by_route_site[GMP,CJU,IP,['20170811', '20170812', '20170813', '20170814', '20170815', '20170816', '20170817', '20170818', '20170819', '20170820', '20170821', '20170822', '20170823', '20170824', '20170825', '20170826', '20170827', '20170828', '20170829', '20170830', '20170831', '20170901', '20170902', '20170903', '20170904', '20170905', '20170906', '20170907', '20170908', '20170909', '20170910', '20170911', '20170912', '20170913', '20170914', '20170915', '20170916', '20170917', '20170918', '20170919', '20170920', '20170921', '20170922', '20170923', '20170924', '20170925', '20170926', '20170927', '20170928', '20170929', '20170930', '20171001', '20171002', '20171003', '20171004', '20171005', '20171006', '20171007', '20171008', '20171009'],['7C', 'LJ', 'TW', 'ZE']]
2017-08-10 08:04:50,275 root     DEBUG    0 :: IP :: crawling

Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170811 , inf:0 , airlineCode:7C , 


2017-08-10 08:04:51,728 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:04:51,878 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170811 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170811 , inf:0 , airlineCode:TW , 


2017-08-10 08:04:52,381 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170811 , inf:0 , airlineCode:ZE , 


2017-08-10 08:04:53,483 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170812 , inf:0 , airlineCode:7C , 


2017-08-10 08:04:54,979 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170812 , inf:0 , airlineCode:LJ , 


2017-08-10 08:04:55,337 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170812 , inf:0 , airlineCode:TW , 


2017-08-10 08:04:55,727 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170812 , inf:0 , airlineCode:ZE , 


2017-08-10 08:04:57,063 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170813 , inf:0 , airlineCode:7C , 


2017-08-10 08:04:59,891 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170813 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:00,216 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170813 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:00,794 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170813 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:03,263 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170814 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:08,200 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170814 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:08,652 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170814 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:09,116 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170814 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:10,373 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170815 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:15,441 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170815 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:15,665 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170815 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:16,252 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170815 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:17,531 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170816 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:19,439 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170816 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:19,694 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170816 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:20,272 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170816 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:21,478 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170817 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:24,058 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170817 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:24,327 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170817 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:24,794 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170817 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:25,994 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170818 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:29,894 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170818 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:30,395 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170818 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:30,964 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170818 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:32,196 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170819 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:34,195 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170819 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:34,426 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170819 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:34,925 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170819 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:36,178 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170820 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:38,362 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:05:38,541 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170820 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170820 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:39,142 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170820 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:40,293 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170821 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:42,026 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170821 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:43,454 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170821 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:43,909 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170821 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:45,005 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170822 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:46,871 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170822 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:47,147 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170822 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:47,780 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170822 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:48,897 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170823 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:50,589 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170823 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:50,954 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170823 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:51,406 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170823 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:52,523 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170824 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:54,357 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170824 , inf:0 , airlineCode:LJ , 


2017-08-10 08:05:54,577 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170824 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:55,168 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170824 , inf:0 , airlineCode:ZE , 


2017-08-10 08:05:56,418 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170825 , inf:0 , airlineCode:7C , 


2017-08-10 08:05:58,531 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:05:58,703 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170825 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170825 , inf:0 , airlineCode:TW , 


2017-08-10 08:05:59,312 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170825 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:00,471 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170826 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:02,402 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170826 , inf:0 , airlineCode:LJ , 


2017-08-10 08:06:02,714 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170826 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:03,289 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170826 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:04,510 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170827 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:06,210 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:06:06,374 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170827 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170827 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:06,992 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170827 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:08,207 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170828 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:10,128 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:06:10,300 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170828 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170828 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:11,020 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170828 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:12,140 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170829 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:14,370 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:06:14,557 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170829 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170829 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:15,195 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170829 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:16,425 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170830 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:18,493 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170830 , inf:0 , airlineCode:LJ , 


2017-08-10 08:06:18,708 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170830 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:19,214 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170830 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:20,682 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170831 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:24,743 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170831 , inf:0 , airlineCode:LJ , 


2017-08-10 08:06:25,178 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170831 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:25,739 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170831 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:26,924 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170901 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:28,988 root     DEBUG    Crawling Interpark domastic schedule site
2017-08-10 08:06:29,171 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170901 , inf:0 , airlineCode:LJ , 
End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170901 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:29,659 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170901 , inf:0 , airlineCode:ZE , 


2017-08-10 08:06:30,858 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170902 , inf:0 , airlineCode:7C , 


2017-08-10 08:06:32,549 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170902 , inf:0 , airlineCode:LJ , 
End Simple crawling

2017-08-10 08:06:32,751 root     DEBUG    Crawling Interpark domastic schedule site



Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170902 , inf:0 , airlineCode:TW , 


2017-08-10 08:06:33,273 root     DEBUG    Crawling Interpark domastic schedule site


End Simple crawling
Start Simple crawling :  http://domair.interpark.com/api/booking/airJourney.do
>> Parameters
chd:0 , adt:1 , format:json , dep:GMP , tripDivi:0 , arr:CJU , depDate:20170902 , inf:0 , airlineCode:ZE , 


In [None]:
### 매일 2회 실시
conds = [
    ## 21일 데이터 김포 제주 왕복 10시,14시
    gmpcju21,cjugmp21,
    ## 국내선 기타 노선 21일(김포 부산, 부산 제주 - 왕복), 10시,14시
    gmppus21,pusgmp21,puscju21,cjupus21,
    ## 국내선 기타 노선 21일(청주 제주, 군산 제주 - 왕복), 10시,14시
    cjjcju21,cjucjj21,kuvcju21,cjukuv21,
    ## 21일 데이터 국제선 10시,14시
    icnnrt21,nrticn21,icnbkk21,bkkicn21
    ### 나리타, 후쿠오카, 방콕 현지 출발 21일 데이터
    ze_nrticn21,tw_nrticn21,c7_nrticn21,lj_nrticn21,
    ze_bkkicn21,tw_bkkicn21,c7_bkkicn21,lj_bkkicn21,
    ze_fukicn21,tw_fukicn21,c7_fukicn21,lj_fukicn21,
]
crawl_date = datetime.today().strftime('%Y%m%d%H')
error_infos = crawl_by_route_siteinfos(crawl_date,conds)

In [None]:
### 나리타, 후쿠오카, 방콕 현지 출발 21일 데이터
conds = [
    ze_nrticn21,tw_nrticn21,c7_nrticn21,lj_nrticn21,
    ze_bkkicn21,tw_bkkicn21,c7_bkkicn21,lj_bkkicn21,
    ze_fukicn21,tw_fukicn21,c7_fukicn21,lj_fukicn21,
]
error_infos = craw_by_route_siteinfos(conds)

In [None]:
### 후쿠오카 출발 21일 데이터 테스트
conds = [
    icnfuk21,
    ze_fukicn21,tw_fukicn21,c7_fukicn21,lj_fukicn21,
]
crawl_date = datetime.today().strftime('%Y%m%d%H')
error_infos = crawl_by_route_siteinfos(crawl_date,conds)

In [None]:
error_infos

In [None]:
#crawl_date = crawl_date
error_infos=crawl_by_route_siteinfos(crawl_date,error_infos)

In [None]:
## 사이트 상태 점검용
previous_crawled_date = get_date() ##오늘, 중도에 오류가 날경우 조정 하여 재실행
start = get_date(base_date=previous_crawled_date,after=1) ## 내일 날짜 구하기
end = get_date(base_date=previous_crawled_date,after=2)  ## 기간 날짜 구하기
dates = list(timeiter(start,end))
gmpcjutest = ['0','GMP', 'CJU', 'IP', ['7C','LJ','TW','ZE'],dates]
craw_by_route_siteinfos([gmpcjutest])