# Scrapping Javascript

In this Notebooks we will scrap the website **Avianca**.

<img src="https://logodownload.org/wp-content/uploads/2017/05/avianca-logo-1-1.png"
     alt="LATAM"
     style="margin: auto;" />

We will obtain:
> - Available prices
> - Departure time and arrival time 
> - Information about the stops

In [64]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time

In [80]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

class Website:
    
    def __init__(self, url):
        self._exec_path = 'driver/msedgedriver'
        self._desired_capabilities = {
            "os" : "OS X",
            "os_version" : "Big Sur",
            "browser" : "Edge",
        }
        self.driver = webdriver.Edge(executable_path=self._exec_path, capabilities=self._desired_capabilities)
        self.url = url
    
    def browse(self):
        self.driver.get(self.url)
    
    def close(self):
        self.driver.close()

class Latam(Website):
    
    def __init__(self):
        self.url = 'https://www.latam.com/es_mx/apps/personas/booking?fecha1_dia=13&fecha1_anomes=2021-04&auAvailability=1&ida_vuelta=ida&vuelos_origen=Monterrey&from_city1=MEX&vuelos_destino=Madrid&to_city1=BUE&flex=1&vuelos_fecha_salida_ddmmaaaa=06/03/2021&cabina=Y&nadults=1&nchildren=0&ninfants=0&cod_promo=&stopover_outbound_days=0&stopover_inbound_days=0&application=#/'
        super().__init__(self.url)
        
        # Variables for the elements in the website
        self.flights_xpath = '//li[@class="flight"]'
        self.departure_xpath = './/div[@class="departure"]/time'
        self.arrival_xpath = './/div[@class="arrival"]/time'
        self.duration_xpath = './/span[@class="duration"]/time'
        self.stops_xpath = './/div[@class="flight-summary-stops-description"]/button'
        
        self.information_flights = self._get_information_flights()
    
    def _get_information_flights(self):
        self.browse()
        delay = 20
        try:
            dialog = WebDriverWait(self.driver, delay).until(EC.presence_of_element_located((By.XPATH, '//div[@class="onesignal-slidedown-dialog"]')))
            print('The page has been fully loaded')
            dialog_button = self.driver.find_element_by_xpath('//div[@class="onesignal-slidedown-dialog"]//button[@class="align-right secondary slidedown-button"]')
            dialog_button.click()
            flights = self.driver.find_elements_by_xpath(self.flights_xpath)
            print(f'{len(flights)} were found.')
            print('Scrapping the flights...')

            information_flights = []
            for flight in flights:
                information_flight = self._get_information_flight(flight)
                button = flight.find_element_by_xpath(self.stops_xpath)
                button.click()
                information_stops = self._get_information_stops(flight)
                close_button = flight.find_element_by_xpath('//button[@class="close"]')
                close_button.click()
                flight.click()
                information_prices = self._get_prices(flight)
                flight.click()
                information_flights.append({
                    'prices' : information_prices,
                    'times' : information_flight,
                    'stops' : information_stops
                })
                
            self.close()
            return information_flights
        
        except TimeoutException:
            print('The Page took so long to load.')
            self.close()        
    
    
    def _get_information_flight(self, flight):
        """This method obtains the general information of the flight
        """
        departure_time = flight.find_element_by_xpath(self.departure_xpath).get_attribute('datetime')
        arrival_time = flight.find_element_by_xpath(self.arrival_xpath).get_attribute('datetime')
        duration_flight = flight.find_element_by_xpath(self.duration_xpath).get_attribute('datetime')
        information_flight = {
            'departure_time' : departure_time,
            'arrival_time' : arrival_time,
            'duration_flight' : duration_flight,
        }
        return information_flight
    
    
    def _get_information_stops(self, flight):
        """This method obtains the information of the stops
        """
        segments = flight.find_elements_by_xpath('//div[@class="sc-hZSUBg gfeULV"]/div[@class="sc-cLQEGU hyoued"]')
        information_stops = []
        for segment in segments:
            airports = segment.find_elements_by_xpath('.//span[@class="sc-hXRMBi gVvErD"]')
            departure_airport = airports[0].text
            arrival_airport = airports[1].text
            flight_number = segment.find_element_by_xpath('.//div[@class="airline-flight-details"]/b').text
            aircraft = segment.find_element_by_xpath('.//div[@class="airline-flight-details"]/span[@class="sc-gzOgki uTyOl"]').text
            data = {
                'departure_airport' : departure_airport,
                'arrival_airport' : arrival_airport,
                'flight_number' : flight_number,
                'aircraft' : aircraft
            }
            information_stops.append(data)

        return information_stops
    
    
    def _get_prices(self, flight):
        """This method obtains the information of the flight
        """
        fares = flight.find_elements_by_xpath('//div[@class="fares-table-container"]//tfoot//td[contains(@class, "fare-")]')
        prices = []
        for fare in fares:
            category_fare = fare.find_element_by_xpath('.//label').get_attribute('for')
            price_fare = fare.find_element_by_xpath('.//span[@class="price"]/span[@class="value"]').text
            currency_fare = fare.find_element_by_xpath('.//span[@class="price"]/span[@class="currency-symbol"]').text
            prices.append({category_fare : {'currency':currency_fare, 'price':price_fare}})

        return prices

In [81]:
latam = Latam()

The page has been fully loaded
2 were found.
Scrapping the flights...


In [79]:
latam.information_flights

[{'prices': [{'LIGHT': {'currency': 'US$', 'price': '1154'}},
   {'PLUS': {'currency': 'US$', 'price': '1295'}},
   {'TOP': {'currency': 'US$', 'price': '922'}}],
  'times': {'departure_time': '12:15',
   'arrival_time': '11:27',
   'duration_flight': 'PT21H12M'},
  'stops': [{'departure_airport': 'Benito Juárez Intl.',
    'arrival_airport': 'Miami Intl.',
    'flight_number': 'AM422',
    'aircraft': 'Boeing 737-800'},
   {'departure_airport': 'Miami Intl.',
    'arrival_airport': 'A. Merino Benítez Intl.',
    'flight_number': 'LA501',
    'aircraft': 'Boeing 787-9'},
   {'departure_airport': 'A. Merino Benítez Intl.',
    'arrival_airport': 'Ezeiza Intl.',
    'flight_number': 'LA475',
    'aircraft': 'Airbus 320-200'}]},
 {'prices': [{'LIGHT': {'currency': '', 'price': ''}},
   {'PLUS': {'currency': '', 'price': ''}},
   {'TOP': {'currency': '', 'price': ''}},
   {'LIGHT': {'currency': 'US$', 'price': '1154'}},
   {'PLUS': {'currency': 'US$', 'price': '1295'}},
   {'TOP': {'curren