# Library

In [1]:
from urllib.request import urlopen
from pandas import json_normalize
from tqdm import tqdm, trange
from datetime import datetime, timedelta
from sqlalchemy import types, create_engine
from tqdm import tqdm 

import pandas as pd
import json
import time
# import cx_Oracle
import os

import warnings
warnings.filterwarnings('ignore')

# Functions

In [2]:
## Get JSON from API url

def get_json(dep_city, arr_city, date, airline):
    url = (
        'https://domair.interpark.com/api/booking/airJourney.do?format=json&dep=' 
        + dep_city
        + '&arr=' 
        + arr_city
        + '&depDate=' 
        + date 
        +'&adt=1&chd=0&inf=0&tripDivi=0&airlineCode=' 
        + airline 
        + '&siteCode='
    )
    
    try:
        json_data = json.load(urlopen(url))

        #Error check
        if json_data == None:
            print("[F1]", dep_city, arr_city, date, airline, " - Null page")

        elif json_data['replyAvailFare'] == None :
            print("[F1]", dep_city, arr_city, date, airline, " - Available ticket is none")

        elif json_data['replyHeader']['errorCode'] != '0': 
            print("[F1]", dep_city, arr_city, date, airline, "Error Code: ", json_data['replyHeader']['errorCode'], " - Try again after a min ...")
            time.sleep(60)
            get_json(dep_city, arr_city, date, airline) # Recursion
            
        else:
            print(dep_city, arr_city, date, airline,"is okay !")
            return json_data
    
    except Exception as ex:

        print("[F1] Error code: ", ex)
        print("Sleep a min ... ")

        time.sleep(60)

        print("Retry !")
        get_json(dep_city, arr_city, date, airline) # Recursion

In [3]:
## JSON parsing and get details

def get_tickets(json_data):
    df = pd.DataFrame(
        columns = ['depart_date', 'depart_time', 'airline', 'flight_no', 'depart', 'arrive', 'class_code', 'class_desc', 'avail_seat', 'fare_origin', 'fare']
    )

    try:
        for i in range(len(json_data['replyAvailFare']['availFareSet'])):
            for j in range(len(json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'])):
                depart_date = json_data['replyAvailFare']['availFareSet'][i]['segFare']['depDate']
                depart_time = json_data['replyAvailFare']['availFareSet'][i]['segFare']['depTime']
                airline_code = json_data['replyAvailFare']['availFareSet'][i]['segFare']['carCode']
                flight_no = json_data['replyAvailFare']['availFareSet'][i]['segFare']['mainFlt']
                depart_code = json_data['replyAvailFare']['availFareSet'][i]['segFare']['depCity']
                arrive_code = json_data['replyAvailFare']['availFareSet'][i]['segFare']['arrCity']
                class_code = json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'][j]['classCode']
                class_desc = json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'][j]['classDesc']
                avail_seat = json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'][j]['noOfAvailSeat']
                fare = json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'][j]['fare']
                fare_origin = json_data['replyAvailFare']['availFareSet'][i]['segFare']['classDetail'][j]['fareOrigin']

                data_list = [depart_date, depart_time, airline_code, flight_no, depart_code, arrive_code, class_code, class_desc, avail_seat, fare_origin, fare]
                data_series = pd.Series(data_list, index = df.columns)
                df = df.append(data_series, ignore_index = True)
                # df = pd.concat([df, data_series], ignore_index = True)

    except Exception as e:
        print("[F2] Error code: " + str(e))
            
    return df

In [4]:
## Data processing

def data_processing(ticket_df):
    
    # Search date & time adding
    ticket_df['search_date'] = datetime.today().strftime('%Y%m%d')
    ticket_df['search_time'] = datetime.today().strftime('%H%M')

    # Data types changing
    ticket_df = ticket_df.astype(
        {
            'depart_date': 'datetime64'
            , 'depart_time': 'string'
            , 'airline': 'string'
            , 'flight_no': 'string'
            , 'depart': 'string'
            , 'arrive': 'string'
            , 'class_code': 'string'
            , 'class_desc': 'string'
            , 'avail_seat': 'int'
            , 'fare_origin': 'int'
            , 'fare': 'int'
            , 'search_date': 'datetime64'
            , 'search_time': 'string'
        }
    )
    
    return ticket_df

In [5]:
## Stack data into Oracle DB

def to_oracle(total_df):
    id = "sample"
    pw = "sample"
    address = "smaple"

    connect = create_engine(f"oracle+cx_oracle://{id}:{pw}@{address}")
    
    try:
        total_df.to_sql("Table name"
            , connect
            , if_exists = 'append'
            , index = False
            , dtype={
                'depart_date': types.Date()
                , 'depart_time': types.VARCHAR()
                , 'airline': types.VARCHAR()
                , 'flight_no': types.VARCHAR()
                , 'depart': types.VARCHAR()
                , 'arrive': types.VARCHAR()
                , 'class_code': types.VARCHAR()
                , 'class_desc': types.VARCHAR()
        #         , 'avail_seat': 'int'
        #         , 'fare_origin': 'int'
                , 'fare': types.VARCHAR()
                , 'search_date': types.Date()
                , 'search_time': types.VARCHAR()
                 }
            )

    except Exception as ex:
        print("[F4] Error code: ", ex)

In [6]:
## Slack notification

def send_slack_message(channel_name, title, file_name, start_time):
    slack_token = "****"
    client = WebClient(token=slack_token)

    content = """
        %s , %s
        """ % (title, start_time)
    file_exist = False
    file_extension = ''

    if os.path.isfile(file_name):
        path, ext= os.path.splitext(file_name)
        file_extension = ext.strip('.')
        print("ext : " +file_extension)
        file_exist = True

    else:
        file_exist = False

    try:
        response = client.chat_postMessage(channel = channel_name, text = content)
        print(response['ok'])
        
        if file_exist:
            response_file_1 = client.files_upload(channels = channel_name
                                                , file = file_name
                                                , filename = file_name
                                                , filetype = file_extension
                                                )
            print(response_file_1['ok'])
        else:
            response_file_error = client.chat_postMessage(channel = channel_name, text = "No files to upload.")
            print(response_file_error['ok'])


    except SlackApiError as e:
        print('Error: {}'.format(e.response['error']))

    except Exception as e:
        print('Error : ' + str(e))

In [7]:
## Stack into Google Cloud Storage

def upload_to_bucket(file_name):
    # os.getcwd() +  
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "JSON key"
    client = storage.Client()
    bucket = client.get_bucket("storage name")
    blob = bucket.blob("storage name" + file_name)
    blob.upload_from_filename(file_name)

In [8]:
## Create BigQuery DB

def insert_to_bigquery_table(dataframe):
    try:
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.getcwd() +  "JSON key"

        # Construct a BigQuery client object.
        client = bigquery.Client()

        table_id = "****"
        table = client.get_table(table_id)  # Make an API request.

        job_config = bigquery.LoadJobConfig(
            schema=[
                ]
        )
        
        job = client.load_table_from_dataframe(
            dataframe, table_id, job_config=job_config
        )  # Make an API request.

        job.result()  
        # Wait for the job to complete.

        table = client.get_table(table_id)  # Make an API request.
        print(
            "Loaded {} rows and {} columns to {}".format(
                table.num_rows, len(table.schema), table_id
            )
        )

    except Exception as e:
        print("Error : " + str(e))

# Main

In [9]:
start_time = datetime.now()
print("Start: ", start_time.strftime('%Y-%m-%d %H:%M'))

# send_slack_message("crawling_logs", "airline_ticket (시작)", "", start_time)

final_df = pd.DataFrame()
line_list = ['GMP', 'CJU'], ['PUS', 'CJU'], ['CJJ', 'CJU'], ['TAE', 'CJU'], ['KWJ', 'CJU'], ['CJU', 'GMP'], ['CJU', 'PUS'], ['CJU', 'CJJ'], ['CJU', 'TAE'], ['CJU', 'KWJ']
airline_list = ['KE', 'OZ', '7C', 'LJ', 'TW', 'BX'] # Airlines code, 'RS' is unstable

for line in tqdm(line_list):
    
    for date in range(1, 61):
        
        date = datetime.today() + timedelta(days = date)
        date = date.strftime('%Y%m%d')
        # print(date)
        
        for airline in airline_list:
            
            json_data = get_json(line[0], line[1], date, airline)
            
            if json_data == None:
                
                continue

            else:

                ticket_df = get_tickets(json_data)

                total_df = data_processing(ticket_df)
                
                final_df = final_df.append(total_df, ignore_index = True)
                # final_df = pd.concat([final_df, total_df], ignore_index = True)
                # to_oracle(total_df)

                # # to parquet
                # end_time = datetime.now()
                # e_time_re = end_time.strftime('%Y%m%d_%H%M')
                # filename = "airline_ticket_" + e_time_re + ".parquet"
                # total_df.to_parquet(filename, engine="pyarrow", compression="gzip")
                # upload_to_bucket(filename)
                # insert_to_bigquery_table(total_df)


end_time = datetime.now()
running_time = end_time - start_time
print("End: ", end_time.strftime('%Y-%m-%d %H:%M'))
print("Running time: ", running_time)

# send_slack_message("crawling_logs", "airline_ticket (종료)", "airline.log", running_time)

Start:  2023-01-02 10:10


  0%|          | 0/10 [00:00<?, ?it/s]

GMP CJU 20230103 KE is okay !
GMP CJU 20230103 OZ is okay !
GMP CJU 20230103 7C is okay !
GMP CJU 20230103 LJ is okay !
GMP CJU 20230103 TW is okay !
GMP CJU 20230103 BX is okay !
GMP CJU 20230104 KE is okay !
GMP CJU 20230104 OZ is okay !
GMP CJU 20230104 7C is okay !
GMP CJU 20230104 LJ is okay !
GMP CJU 20230104 TW is okay !
GMP CJU 20230104 BX is okay !
GMP CJU 20230105 KE is okay !
GMP CJU 20230105 OZ is okay !
GMP CJU 20230105 7C is okay !
GMP CJU 20230105 LJ is okay !
GMP CJU 20230105 TW is okay !
GMP CJU 20230105 BX is okay !
GMP CJU 20230106 KE is okay !
GMP CJU 20230106 OZ is okay !
GMP CJU 20230106 7C is okay !
GMP CJU 20230106 LJ is okay !
GMP CJU 20230106 TW is okay !
GMP CJU 20230106 BX is okay !
GMP CJU 20230107 KE is okay !
GMP CJU 20230107 OZ is okay !
GMP CJU 20230107 7C is okay !
GMP CJU 20230107 LJ is okay !
GMP CJU 20230107 TW is okay !
GMP CJU 20230107 BX is okay !
GMP CJU 20230108 KE is okay !
GMP CJU 20230108 OZ is okay !
GMP CJU 20230108 7C is okay !
GMP CJU 20

 10%|█         | 1/10 [03:32<31:54, 212.71s/it]

GMP CJU 20230303 BX is okay !
PUS CJU 20230103 KE is okay !
PUS CJU 20230103 OZ is okay !
PUS CJU 20230103 7C is okay !
PUS CJU 20230103 LJ is okay !
[F1] PUS CJU 20230103 TW  - Available ticket is none
PUS CJU 20230103 BX is okay !
PUS CJU 20230104 KE is okay !
PUS CJU 20230104 OZ is okay !
PUS CJU 20230104 7C is okay !
PUS CJU 20230104 LJ is okay !
[F1] PUS CJU 20230104 TW  - Available ticket is none
PUS CJU 20230104 BX is okay !
PUS CJU 20230105 KE is okay !
PUS CJU 20230105 OZ is okay !
PUS CJU 20230105 7C is okay !
PUS CJU 20230105 LJ is okay !
[F1] PUS CJU 20230105 TW  - Available ticket is none
PUS CJU 20230105 BX is okay !
PUS CJU 20230106 KE is okay !
[F1] PUS CJU 20230106 OZ  - Available ticket is none
PUS CJU 20230106 7C is okay !
PUS CJU 20230106 LJ is okay !
[F1] PUS CJU 20230106 TW  - Available ticket is none
PUS CJU 20230106 BX is okay !
PUS CJU 20230107 KE is okay !
PUS CJU 20230107 OZ is okay !
PUS CJU 20230107 7C is okay !
PUS CJU 20230107 LJ is okay !
[F1] PUS CJU 20

 20%|██        | 2/10 [08:25<34:38, 259.77s/it]

PUS CJU 20230303 BX is okay !
CJJ CJU 20230103 KE is okay !
CJJ CJU 20230103 OZ is okay !
CJJ CJU 20230103 7C is okay !
CJJ CJU 20230103 LJ is okay !
CJJ CJU 20230103 TW is okay !
[F1] CJJ CJU 20230103 BX  - Available ticket is none
CJJ CJU 20230104 KE is okay !
CJJ CJU 20230104 OZ is okay !
CJJ CJU 20230104 7C is okay !
CJJ CJU 20230104 LJ is okay !
CJJ CJU 20230104 TW is okay !
[F1] CJJ CJU 20230104 BX  - Available ticket is none
CJJ CJU 20230105 KE is okay !
CJJ CJU 20230105 OZ is okay !
CJJ CJU 20230105 7C is okay !
CJJ CJU 20230105 LJ is okay !
CJJ CJU 20230105 TW is okay !
[F1] CJJ CJU 20230105 BX  - Available ticket is none
CJJ CJU 20230106 KE is okay !
CJJ CJU 20230106 OZ is okay !
[F1] CJJ CJU 20230106 7C  - Available ticket is none
CJJ CJU 20230106 LJ is okay !
CJJ CJU 20230106 TW is okay !
[F1] CJJ CJU 20230106 BX  - Available ticket is none
CJJ CJU 20230107 KE is okay !
CJJ CJU 20230107 OZ is okay !
CJJ CJU 20230107 7C is okay !
CJJ CJU 20230107 LJ is okay !
CJJ CJU 2023010

 30%|███       | 3/10 [13:48<33:41, 288.80s/it]

[F1] CJJ CJU 20230303 BX  - Available ticket is none
TAE CJU 20230103 KE is okay !
TAE CJU 20230103 OZ is okay !
TAE CJU 20230103 7C is okay !
TAE CJU 20230103 LJ is okay !
TAE CJU 20230103 TW is okay !
[F1] TAE CJU 20230103 BX  - Available ticket is none
TAE CJU 20230104 KE is okay !
TAE CJU 20230104 OZ is okay !
TAE CJU 20230104 7C is okay !
TAE CJU 20230104 LJ is okay !
TAE CJU 20230104 TW is okay !
[F1] TAE CJU 20230104 BX  - Available ticket is none
TAE CJU 20230105 KE is okay !
[F2] Error code: object of type 'NoneType' has no len()
TAE CJU 20230105 OZ is okay !
TAE CJU 20230105 7C is okay !
TAE CJU 20230105 LJ is okay !
TAE CJU 20230105 TW is okay !
[F1] TAE CJU 20230105 BX  - Available ticket is none
TAE CJU 20230106 KE is okay !
TAE CJU 20230106 OZ is okay !
TAE CJU 20230106 7C is okay !
TAE CJU 20230106 LJ is okay !
TAE CJU 20230106 TW is okay !
[F1] TAE CJU 20230106 BX  - Available ticket is none
TAE CJU 20230107 KE is okay !
TAE CJU 20230107 OZ is okay !
TAE CJU 20230107 7C

 40%|████      | 4/10 [18:32<28:41, 286.91s/it]

[F1] TAE CJU 20230303 BX  - Available ticket is none
KWJ CJU 20230103 KE is okay !
KWJ CJU 20230103 OZ is okay !
KWJ CJU 20230103 7C is okay !
KWJ CJU 20230103 LJ is okay !
KWJ CJU 20230103 TW is okay !
[F1] KWJ CJU 20230103 BX  - Available ticket is none
KWJ CJU 20230104 KE is okay !
KWJ CJU 20230104 OZ is okay !
KWJ CJU 20230104 7C is okay !
KWJ CJU 20230104 LJ is okay !
KWJ CJU 20230104 TW is okay !
[F1] KWJ CJU 20230104 BX  - Available ticket is none
KWJ CJU 20230105 KE is okay !
KWJ CJU 20230105 OZ is okay !
KWJ CJU 20230105 7C is okay !
KWJ CJU 20230105 LJ is okay !
KWJ CJU 20230105 TW is okay !
[F1] KWJ CJU 20230105 BX  - Available ticket is none
KWJ CJU 20230106 KE is okay !
KWJ CJU 20230106 OZ is okay !
KWJ CJU 20230106 7C is okay !
KWJ CJU 20230106 LJ is okay !
KWJ CJU 20230106 TW is okay !
[F1] KWJ CJU 20230106 BX  - Available ticket is none
KWJ CJU 20230107 KE is okay !
KWJ CJU 20230107 OZ is okay !
KWJ CJU 20230107 7C is okay !
KWJ CJU 20230107 LJ is okay !
KWJ CJU 2023010

 50%|█████     | 5/10 [23:47<24:44, 296.96s/it]

[F1] KWJ CJU 20230303 BX  - Available ticket is none
CJU GMP 20230103 KE is okay !
CJU GMP 20230103 OZ is okay !
CJU GMP 20230103 7C is okay !
CJU GMP 20230103 LJ is okay !
CJU GMP 20230103 TW is okay !
CJU GMP 20230103 BX is okay !
CJU GMP 20230104 KE is okay !
CJU GMP 20230104 OZ is okay !
CJU GMP 20230104 7C is okay !
CJU GMP 20230104 LJ is okay !
CJU GMP 20230104 TW is okay !
CJU GMP 20230104 BX is okay !
CJU GMP 20230105 KE is okay !
CJU GMP 20230105 OZ is okay !
CJU GMP 20230105 7C is okay !
CJU GMP 20230105 LJ is okay !
CJU GMP 20230105 TW is okay !
CJU GMP 20230105 BX is okay !
CJU GMP 20230106 KE is okay !
CJU GMP 20230106 OZ is okay !
CJU GMP 20230106 7C is okay !
CJU GMP 20230106 LJ is okay !
CJU GMP 20230106 TW is okay !
CJU GMP 20230106 BX is okay !
CJU GMP 20230107 KE is okay !
CJU GMP 20230107 OZ is okay !
CJU GMP 20230107 7C is okay !
CJU GMP 20230107 LJ is okay !
CJU GMP 20230107 TW is okay !
CJU GMP 20230107 BX is okay !
CJU GMP 20230108 KE is okay !
CJU GMP 20230108 

 60%|██████    | 6/10 [27:39<18:19, 274.88s/it]

CJU GMP 20230303 BX is okay !
[F1] CJU PUS 20230103 KE  - Available ticket is none
[F1] CJU PUS 20230103 OZ  - Available ticket is none
[F1] CJU PUS 20230103 7C  - Available ticket is none
CJU PUS 20230103 LJ is okay !
[F2] Error code: object of type 'NoneType' has no len()
[F1] CJU PUS 20230103 TW  - Available ticket is none
CJU PUS 20230103 BX is okay !
CJU PUS 20230104 KE is okay !
CJU PUS 20230104 OZ is okay !
CJU PUS 20230104 7C is okay !
CJU PUS 20230104 LJ is okay !
[F1] CJU PUS 20230104 TW  - Available ticket is none
CJU PUS 20230104 BX is okay !
CJU PUS 20230105 KE is okay !
CJU PUS 20230105 OZ is okay !
CJU PUS 20230105 7C is okay !
CJU PUS 20230105 LJ is okay !
[F1] CJU PUS 20230105 TW  - Available ticket is none
CJU PUS 20230105 BX is okay !
CJU PUS 20230106 KE is okay !
CJU PUS 20230106 OZ is okay !
CJU PUS 20230106 7C is okay !
CJU PUS 20230106 LJ is okay !
[F1] CJU PUS 20230106 TW  - Available ticket is none
CJU PUS 20230106 BX is okay !
CJU PUS 20230107 KE is okay !
CJU

 70%|███████   | 7/10 [32:37<14:06, 282.29s/it]

CJU PUS 20230303 BX is okay !
CJU CJJ 20230103 KE is okay !
CJU CJJ 20230103 OZ is okay !
CJU CJJ 20230103 7C is okay !
CJU CJJ 20230103 LJ is okay !
CJU CJJ 20230103 TW is okay !
[F1] CJU CJJ 20230103 BX  - Available ticket is none
CJU CJJ 20230104 KE is okay !
CJU CJJ 20230104 OZ is okay !
CJU CJJ 20230104 7C is okay !
CJU CJJ 20230104 LJ is okay !
CJU CJJ 20230104 TW is okay !
[F1] CJU CJJ 20230104 BX  - Available ticket is none
CJU CJJ 20230105 KE is okay !
CJU CJJ 20230105 OZ is okay !
CJU CJJ 20230105 7C is okay !
CJU CJJ 20230105 LJ is okay !
CJU CJJ 20230105 TW is okay !
[F1] CJU CJJ 20230105 BX  - Available ticket is none
CJU CJJ 20230106 KE is okay !
CJU CJJ 20230106 OZ is okay !
CJU CJJ 20230106 7C is okay !
CJU CJJ 20230106 LJ is okay !
CJU CJJ 20230106 TW is okay !
[F1] CJU CJJ 20230106 BX  - Available ticket is none
CJU CJJ 20230107 KE is okay !
CJU CJJ 20230107 OZ is okay !
CJU CJJ 20230107 7C is okay !
CJU CJJ 20230107 LJ is okay !
CJU CJJ 20230107 TW is okay !
[F1] CJU

 80%|████████  | 8/10 [37:09<09:18, 279.22s/it]

[F1] CJU CJJ 20230303 BX  - Available ticket is none
CJU TAE 20230103 KE is okay !
CJU TAE 20230103 OZ is okay !
CJU TAE 20230103 7C is okay !
CJU TAE 20230103 LJ is okay !
CJU TAE 20230103 TW is okay !
[F1] CJU TAE 20230103 BX  - Available ticket is none
CJU TAE 20230104 KE is okay !
CJU TAE 20230104 OZ is okay !
CJU TAE 20230104 7C is okay !
CJU TAE 20230104 LJ is okay !
CJU TAE 20230104 TW is okay !
[F1] CJU TAE 20230104 BX  - Available ticket is none
CJU TAE 20230105 KE is okay !
CJU TAE 20230105 OZ is okay !
CJU TAE 20230105 7C is okay !
CJU TAE 20230105 LJ is okay !
CJU TAE 20230105 TW is okay !
[F1] CJU TAE 20230105 BX  - Available ticket is none
CJU TAE 20230106 KE is okay !
CJU TAE 20230106 OZ is okay !
