## 김포 제주 노선 가격 정보 읽어오기
- 대상 사이트 : 에어부산 웹사이트

### 에어부산 사이트 웹 크롤링
    - 에어부산 국제선 항공권예매 페이지를 이용한 크롤링
    - 기본 크롤링
    - 필요 데이터 추출 하여 Pandas 의 DataFrame 형태로 생성
    - 생성된 데이터 엑셀 파일로 저장

In [88]:
from IPython.display import display
import requests
import pandas as pd
import numpy as np
from pandas import DataFrame
from bs4 import BeautifulSoup
import time
from datetime import datetime
from datetime import timedelta
from common.crawling_util import simple_crawling
from common.parsing_util import parsing_json_data_to_dict,mining_value_by_last_field_name
from common.parsing_util import r_flatten,stat_fare
from common.util import fill_list

def crawling_BX_data(dpt,arr,dpt_date):
    ##출발지, 도착지, 출발일을 기준으로 국내선(국제선) 편도 가격 읽어오기
    print('Crawling airbusan homepage schedule site')
    url = "https://www.airbusan.com/web/bookingApi/internationalOnewayAvail"
    head = {
        'Referer':'https://www.airbusan.com/web/individual/booking/international',
    }
    param ={## depDate 포맷 체크 필요 YYYY-MM-DD
    'jsonString':'{"bookingCategory":"Individual","tripType":"OW","listItinerary":\
    [{"itinNo":"1","depCity":"'+dpt+'","arrCity":"'+arr+'","depDate":"'+dpt_date+'","itineraryType":"Going"}],\
    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"}'
    }

    return simple_crawling(url,param,head=head,method='post',json=True)

data_heads = ['date','flt','start','end','cls','fare','tax1','tax2','seat']
key_fields = ['flightNo','depTime','arrTime','cls','priceAd','avail']
## Tax Info :  json['pubTaxFuel']['fuelAd'],json['pubTaxFuel']['taxAd']
## raw 데이터로 부터 DataFrame 생성
def raw_to_df(raw_data):
    df_list = []
    for list_flight in raw_data['listFareIntAvail'][0]['listFlight']:
        df_dict = mining_value_by_last_field_name(list_flight,key_fields)
        max_len = max(len(d) for d in df_dict.values())
        for k,v in df_dict.items():
            if len(v) < max_len:
                df_dict[k] = fill_list(v,max_len,method='bfill')
        df = pd.DataFrame(df_dict,columns=key_fields)
        df.columns = ['flt','start','end','cls','fare','seat']
        df_list.append(df)
    result_df = pd.concat(df_list,ignore_index=True)
    result_df['date'] = raw_data['listFareIntAvail'][0]['depDate']
    result_df['tax1'] = raw_data['pubTaxFuel']['fuelAd']
    result_df['tax2'] = raw_data['pubTaxFuel']['taxAd']
    return result_df[data_heads]

## 하루 기준 가격정보, 텍스정보, 최소값, 최대값, 평균 DataFrame 생성
def read_BX_1day_fare(dpt,arr,dpt_date):
    ## 데이터 읽어오기
    raw_data = crawling_BX_data(dpt,arr,dpt_date)
    df=raw_to_df(raw_data)
    if df is None or len(df) == 0: ## 읽는 도중 에러가 나거나 익셉션 발생으로 문제가 있을 경우 처리
        print('********** No Data On raw_to_df **********')
        return None

    ## 최소값 최대값 평균 계산
    stat = stat_fare(df,['fare'])
    ## 요약정보 추가
    df.ix[len(df)] = [dpt_date,'min','max','mean',str(stat[0]),str(stat[1]),str(stat[2]),'','']
    return df

## 정해진 기간의 데이터 읽어오기, 기본 30일
def read_BX_date_range_fare(dpt,arr,start=0,end=31):
    # 기본 30일간 데이터 읽어서 파일로 저장
    date_range = [ (datetime.today()+timedelta(1)*i).strftime('%Y%m%d') for i in range(start,end)]
    df_list = []
    for d in date_range:
        try:
            fare_df = read_BX_1day_fare(dpt,arr,d)
            if fare_df is not None:
                df_list.append(fare_df)
        except Exception as e:
            print('****** Error occured : ',e)
    result = pd.concat(df_list,ignore_index=True)
    print('++++++++++Total : ', len(result))
    ## 파일 저장
    result.to_excel('{}/{}_{}_{}_{}_{}_{}.xls'.format('excel','BX',dpt,arr,start,end,datetime.today().strftime('%Y%m%d%H%M')))
    return result

In [86]:
dpt, arr, dpt_date = 'PUS','NRT','20170520'
json=crawling_BX_data(dpt,arr,dpt_date)
raw_to_df(json)

Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170520","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling


Unnamed: 0,date,flt,start,end,cls,fare,tax1,tax2,seat
0,20170520,BX0112,1100,1300,L,130000.0,0.0,23000.0,41
1,20170520,BX0112,1100,1300,S,200000.0,0.0,23000.0,20
2,20170520,BX0114,1435,1635,A,100000.0,0.0,23000.0,19
3,20170520,BX0114,1435,1635,L,110000.0,0.0,23000.0,59
4,20170520,BX0114,1435,1635,S,200000.0,0.0,23000.0,20


In [89]:
## 하루치 데이터 읽어오기
dpt, arr, dpt_date = 'PUS','NRT','20170520'
#dpt, arr, dpt_date, dom_int = 'ICN','NRT','2017-05-01','I'

read_BX_1day_fare(dpt,arr,dpt_date)

Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170520","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling


  if '' in fare_arr or '0' in fare_arr:


Unnamed: 0,date,flt,start,end,cls,fare,tax1,tax2,seat
0,20170520,BX0112,1100,1300,L,130000.0,0.0,23000.0,41.0
1,20170520,BX0112,1100,1300,S,200000.0,0.0,23000.0,20.0
2,20170520,BX0114,1435,1635,A,100000.0,0.0,23000.0,19.0
3,20170520,BX0114,1435,1635,L,110000.0,0.0,23000.0,59.0
4,20170520,BX0114,1435,1635,S,200000.0,0.0,23000.0,20.0
5,20170520,min,max,mean,100000.0,200000.0,135000.0,,


In [90]:
## 정해진 기간의 데이터 읽어오기
dpt, arr, = 'PUS','NRT'
start,end = 0, 31 ## 읽어올 데이터 기간
read_BX_date_range_fare(dpt,arr,start,end)

Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170508","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
****** Error occured :  No objects to concatenate
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170509","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start S

  if '' in fare_arr or '0' in fare_arr:


End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170511","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170512","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.

Unnamed: 0,date,flt,start,end,cls,fare,tax1,tax2,seat
0,20170509,BX0112,1100,1300,A,100000,0,23000,6
1,20170509,BX0112,1100,1300,L,120000,0,23000,34
2,20170509,BX0112,1100,1300,S,200000,0,23000,20
3,20170509,BX0114,1435,1635,A,90000,0,23000,37
4,20170509,BX0114,1435,1635,L,110000,0,23000,49
5,20170509,BX0114,1435,1635,S,200000,0,23000,20
6,20170509,min,max,mean,90000.0,200000.0,124000.0,,
7,20170510,BX0112,1100,1300,A,80000,0,23000,50
8,20170510,BX0112,1100,1300,L,110000,0,23000,40
9,20170510,BX0112,1100,1300,S,200000,0,23000,40


In [91]:
## 정해진 기간의 데이터 읽어오기
dpt, arr, = 'PUS','NRT'
start,end = 31, 46 ## 읽어올 데이터 기간
read_BX_date_range_fare(dpt,arr,start,end)

Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170608","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170609","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 


  if '' in fare_arr or '0' in fare_arr:


End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170610","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170611","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.

Unnamed: 0,date,flt,start,end,cls,fare,tax1,tax2,seat
0,20170608,BX0112,1100,1300,A,80000,0,23000,52
1,20170608,BX0112,1100,1300,L,110000,0,23000,60
2,20170608,BX0112,1100,1300,S,200000,0,23000,40
3,20170608,BX0114,1435,1635,A,60000,0,23000,79
4,20170608,BX0114,1435,1635,L,110000,0,23000,70
5,20170608,BX0114,1435,1635,S,200000,0,23000,20
6,20170608,min,max,mean,60000.0,200000.0,112500.0,,
7,20170609,BX0112,1100,1300,L,120000,0,23000,35
8,20170609,BX0112,1100,1300,S,200000,0,23000,20
9,20170609,BX0114,1435,1635,A,90000,0,23000,33


In [92]:
## 정해진 기간의 데이터 읽어오기
dpt, arr, = 'PUS','NRT'
start,end = 46, 90 ## 읽어올 데이터 기간
read_BX_date_range_fare(dpt,arr,start,end)

Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170623","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170624","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 


  if '' in fare_arr or '0' in fare_arr:


End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170625","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.airbusan.com/web/bookingApi/internationalOnewayAvail
>> Parameters
jsonString:{"bookingCategory":"Individual","tripType":"OW","listItinerary":    [{"itinNo":"1","depCity":"PUS","arrCity":"NRT","depDate":"20170626","itineraryType":"Going"}],    "focYN":"N","openReturnYN":"","paxCountCorp":0,"paxCountAd":1,"paxCountCh":0,"paxCountIn":0,"itinNo":"1"} , 
End Simple crawling
Crawling airbusan homepage schedule site
Start Simple crawling :  https://www.

Unnamed: 0,date,flt,start,end,cls,fare,tax1,tax2,seat
0,20170623,BX0112,1100,1300,A,100000,0,23000,22
1,20170623,BX0112,1100,1300,L,110000,0,23000,70
2,20170623,BX0112,1100,1300,S,200000,0,23000,20
3,20170623,BX0114,1435,1635,A,80000,0,23000,42
4,20170623,BX0114,1435,1635,L,110000,0,23000,70
5,20170623,BX0114,1435,1635,S,200000,0,23000,20
6,20170623,min,max,mean,80000.0,200000.0,122500.0,,
7,20170624,BX0112,1100,1300,A,100000,0,23000,9
8,20170624,BX0112,1100,1300,L,110000,0,23000,57
9,20170624,BX0112,1100,1300,S,200000,0,23000,20
