In [1]:
## Need to install selenium driver and chrome driver
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import pandas as pd
import yaml
import csv

In [2]:
config = yaml.safe_load(open("./.config.yml"))

In [3]:
def fly_login(username, password):
    # head to fly login page
    driver = webdriver.Chrome("chromedriver")
    driver.get("https://fly.alaskasworld.com/")
    # find username/email field and send the username itself to the input field
    driver.find_element_by_id("UserName").send_keys(username)
    # find password input field and insert password as well
    driver.find_element_by_id("Password").send_keys(password)
    # click login button
    driver.find_element_by_id("loginsubmit").click()
    return driver

In [4]:
def connecting_flights(driver):
    num_layover = len(driver.find_elements_by_xpath('//*[contains(@class,"flight-divider")]'))
    return (num_layover > 0)

In [5]:
def clean_flight_list(flight_list):
    flight_list['orig'] = flight_list['orig_and_time'].str[:3]
    flight_list['dep'] = pd.to_datetime(flight_list['orig_and_time'].str[4:], format='%I:%M %p').dt.strftime('%H:%M')
    flight_list['dest'] = flight_list['dest_and_time'].str[:3]
    flight_list['arr'] = pd.to_datetime(flight_list['dest_and_time'].str[4:], format='%I:%M %p').dt.strftime('%H:%M')
    return flight_list.drop(columns=['orig_and_time','dest_and_time','show_details'])

In [6]:
def get_flight_list(driver):
    table = driver.find_elements_by_xpath('//*[contains(@class,"flight-details")]')
    flight_list = pd.DataFrame(columns=['flight_num','orig_and_time','dest_and_time','config','flight_time','show_details'])
    for t in table:
        flight_list.loc[len(flight_list)] = t.text.split('\n')
    return clean_flight_list(flight_list)

In [7]:
def clean_flight_seats(flight_seat_list):
    flight_seats = pd.DataFrame(columns=['config_f','config_y','auth_f','auth_y','config_open_f','config_open_y','auth_open_f','auth_open_y', 'listed'])
    for fs in flight_seat_list:
        if fs[0] == 'Cnfg':
            # Next flight - initiate a row
            flight_seats.loc[len(flight_seats)] = None
        if fs[0] in ['Cnfg','Auth','Cnfg Open','Auth Open']:
            first_number = fs[0].lower().replace('cnfg','config').replace(' ','_')+'_f'
            second_number = fs[0].lower().replace('cnfg','config').replace(' ','_')+'_y'
            flight_seats.loc[len(flight_seats)-1][first_number] = fs[1]
            flight_seats.loc[len(flight_seats)-1][second_number] = fs[2]
        elif fs[0] == 'Listed':
            flight_seats.loc[len(flight_seats)-1]['listed'] = fs[1]
    return flight_seats

In [8]:
def get_flight_seats(driver):
    flight_seat_list = []
    for table in driver.find_elements_by_xpath('//*[contains(@id,"flightSeatDetails")]//tr'):
        data = [item.get_attribute("innerHTML") for item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
        flight_seat_list.append(data)
    return clean_flight_seats(flight_seat_list)

In [9]:
def get_flight_details(orig, dest, flight_date):
    url = "https://fly.alaskasworld.com/search/{0}/{1}/{2}".format(orig, dest, flight_date)
    driver.get(url)
    if connecting_flights(driver):
        return None
    else:
        flight_list = get_flight_list(driver)
        flight_seats = get_flight_seats(driver)
        flight_data = pd.concat([flight_list,flight_seats], axis=1)
        return flight_data

In [10]:
def get_sfo_dest(filename):
    with open("./"+filename) as f:
        reader = csv.reader(f)
        return list(reader)

In [11]:
flight_date = '06Mar'
output_file = 'flight_results_{0}.csv'.format(flight_date)


username = config['fly']['username']
pw = config['fly']['pw']
driver = fly_login(username, pw)
sfo_dest = get_sfo_dest("sfo_dest.csv")
final_results = pd.DataFrame()
for d in sfo_dest:
    dest = d[0]
    results = get_flight_details('SFO',dest,flight_date)
    print(results)
    final_results = final_results.append(results)
final_results.to_csv(output_file)

  flight_num   config flight_time orig    dep dest    arr config_f config_y  \
0       2078  12F/64Y      3h 30m  SFO  10:05  AUS  15:35       12       64   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     11     63             1            30           0          29      1  
  flight_num   config flight_time orig    dep dest    arr config_f config_y  \
0       3358  12F/64Y      1h 40m  SFO  13:50  BOI  16:30       12       64   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     11     63             8            38           7          37      0  
None
None
  flight_num   config flight_time orig    dep dest    arr config_f config_y  \
0       3451  12F/64Y      3h 38m  SFO  11:00  DAL  16:38       12       64   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     11     63             4            44           3          43      0  
None
None
None
  flight_num   config flight_time or

  flight_num   config flight_time orig    dep dest    arr config_f config_y  \
0       3390  12F/64Y      1h 55m  SFO  14:15  GEG  16:10       12       64   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     11     63             6            34           5          33      0  
  flight_num    config flight_time orig    dep dest    arr config_f config_y  \
0        608  16F/162Y      4h 55m  SFO  13:15  TPA  21:10       16      162   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     16    153             6           125           6         116      0  
None
None
None
  flight_num    config flight_time orig    dep dest    arr config_f config_y  \
0        346  16F/174Y      5h 20m  SFO  09:30  FLL  17:50       16      174   

  auth_f auth_y config_open_f config_open_y auth_open_f auth_open_y listed  
0     15    157             4           106           3          89      0  
None
