## WebScraping IV: attack of the APIs

An API (Application Programming Interface) is a type of intermediary software that allows two applications to exchange information between them.

In [1]:
import requests

url = "https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/autosuggest/v1.0/UK/GBP/en-GB/"

params = {"query":"Tokyo"}

headers = {'x-rapidapi-host': "skyscanner-skyscanner-flight-search-v1.p.rapidapi.com",
           'x-rapidapi-key': "829c1b786dmsh8e94e16f4eab4fep1641e0jsn75af3c7f9e87"}

response = requests.get(url, headers = headers, params = params)
response.json()

{'Places': [{'PlaceId': 'TYOA-sky',
   'PlaceName': 'Tokyo',
   'CountryId': 'JP-sky',
   'RegionId': '',
   'CityId': 'TYOA-sky',
   'CountryName': 'Japan'},
  {'PlaceId': 'NRT-sky',
   'PlaceName': 'Tokyo Narita',
   'CountryId': 'JP-sky',
   'RegionId': '',
   'CityId': 'TYOA-sky',
   'CountryName': 'Japan'},
  {'PlaceId': 'HND-sky',
   'PlaceName': 'Tokyo Haneda',
   'CountryId': 'JP-sky',
   'RegionId': '',
   'CityId': 'TYOA-sky',
   'CountryName': 'Japan'},
  {'PlaceId': 'TJH-sky',
   'PlaceName': 'Toyooka',
   'CountryId': 'JP-sky',
   'RegionId': '',
   'CityId': 'JTJH-sky',
   'CountryName': 'Japan'},
  {'PlaceId': 'OOK-sky',
   'PlaceName': 'Toksook Bay',
   'CountryId': 'US-sky',
   'RegionId': 'AK',
   'CityId': 'OOKA-sky',
   'CountryName': 'United States'}]}

In [2]:
url = "https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/browsequotes/v1.0/US/USD/en-US/SFO-sky/NYCA-sky/2021-05-29"

params = {"inboundpartialdate":"2021-05-29"}

headers = {
    'x-rapidapi-host': "skyscanner-skyscanner-flight-search-v1.p.rapidapi.com",
    'x-rapidapi-key': "829c1b786dmsh8e94e16f4eab4fep1641e0jsn75af3c7f9e87"}
`
response = requests.get(url, headers=headers, params=params)

response.json()

{'Quotes': [{'QuoteId': 1,
   'MinPrice': 302,
   'Direct': False,
   'OutboundLeg': {'CarrierIds': [1065],
    'OriginId': 81727,
    'DestinationId': 50290,
    'DepartureDate': '2021-05-29T00:00:00'},
   'QuoteDateTime': '2021-05-25T14:29:00'},
  {'QuoteId': 2,
   'MinPrice': 352,
   'Direct': True,
   'OutboundLeg': {'CarrierIds': [1793],
    'OriginId': 81727,
    'DestinationId': 60987,
    'DepartureDate': '2021-05-29T00:00:00'},
   'QuoteDateTime': '2021-05-25T15:17:00'}],
 'Carriers': [{'CarrierId': 1065, 'Name': 'Frontier Airlines'},
  {'CarrierId': 1793, 'Name': 'United'}],
 'Places': [{'Name': 'New York Newark',
   'Type': 'Station',
   'PlaceId': 50290,
   'IataCode': 'EWR',
   'SkyscannerCode': 'EWR',
   'CityName': 'New York',
   'CityId': 'NYCA',
   'CountryName': 'United States'},
  {'Name': 'New York John F. Kennedy',
   'Type': 'Station',
   'PlaceId': 60987,
   'IataCode': 'JFK',
   'SkyscannerCode': 'JFK',
   'CityName': 'New York',
   'CityId': 'NYCA',
   'Country

### Working with JSON

In [3]:
import pandas as pd
import json

# Normalize the response

pd.json_normalize(response.json())

Unnamed: 0,Quotes,Carriers,Places,Currencies
0,"[{'QuoteId': 1, 'MinPrice': 302, 'Direct': Fal...","[{'CarrierId': 1065, 'Name': 'Frontier Airline...","[{'Name': 'New York Newark', 'Type': 'Station'...","[{'Code': 'USD', 'Symbol': '$', 'ThousandsSepa..."


In [4]:
quotes = pd.DataFrame(pd.json_normalize(response.json())["Quotes"][0])
carriers = pd.DataFrame(pd.json_normalize(response.json())["Carriers"][0])
places = pd.DataFrame(pd.json_normalize(response.json())["Places"][0])
currencies = pd.DataFrame(pd.json_normalize(response.json())["Currencies"][0])

In [5]:
quotes

Unnamed: 0,QuoteId,MinPrice,Direct,OutboundLeg,QuoteDateTime
0,1,302,False,"{'CarrierIds': [1065], 'OriginId': 81727, 'Des...",2021-05-25T14:29:00
1,2,352,True,"{'CarrierIds': [1793], 'OriginId': 81727, 'Des...",2021-05-25T15:17:00


In [6]:
flights = pd.DataFrame(pd.DataFrame(pd.json_normalize(response.json())["Quotes"][0])["OutboundLeg"][0])
flights

Unnamed: 0,CarrierIds,OriginId,DestinationId,DepartureDate
0,1065,81727,50290,2021-05-29T00:00:00


#### Some cool functions ~to make things easier~

In [7]:
# The idea here is to create functions to automate the process of checking for cheapest flights or so:

def lowest_price(origin, destination, start, end):
    '''
    This function takes the origin, destination and dates as arguments.
    Returns information about the flight with cheapest price.
    '''
    flights = flight_days(origin, destination, start, end)
    prices = {date:min([quote["MinPrice"] for quote in flights[date]["Quotes"]]) for date in flights for quote in flights[date]["Quotes"]}
    return min(prices, key = prices.get)


def flight_days(origin, destination, start, end):
    '''
    This function takes the origin, destination and dates as arguments.
    Returns a dictionary that has every day between the two dates with flight information.
    '''
    dates = pd.Series(pd.date_range(start, end, freq='d').format())
    return {date: flight_prices(origin, destination, date) for date in dates}


def flight_prices(departure, arrival, date):
    '''
    This function takes name of city of departure and arrival, and the date.
    Returns the prices for the requested flights.
    '''
    
    departure_code = city_code(departure)
    arrival_code = city_code(arrival)
    url = f"https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/browsequotes/v1.0/US/USD/en-US/{departure_code}/{arrival_code}/{date}"

    params = {"inboundpartialdate":{date}}

    headers = {
    'x-rapidapi-host': "skyscanner-skyscanner-flight-search-v1.p.rapidapi.com",
    'x-rapidapi-key': "829c1b786dmsh8e94e16f4eab4fep1641e0jsn75af3c7f9e87"}

    response = requests.get(url, headers=headers, params=params)

    return response.json()


def city_code(city_name):
    '''This function takes a city name and returns the corresponding code.'''
    
    url = "https://skyscanner-skyscanner-flight-search-v1.p.rapidapi.com/apiservices/autosuggest/v1.0/US/USD/en-US/"
    
    params = {"query": city_name}

    headers = {
        'x-rapidapi-host': "skyscanner-skyscanner-flight-search-v1.p.rapidapi.com",
        'x-rapidapi-key': "829c1b786dmsh8e94e16f4eab4fep1641e0jsn75af3c7f9e87"}

    response = requests.get(url, headers = headers, params = params)

    return response.json()["Places"][0]["PlaceId"]

In [8]:
flight_days('Lisbon', 'São Paulo', '15/12/2021', '16/12/2021')

{'2021-12-15': {'Quotes': [{'QuoteId': 1,
    'MinPrice': 445,
    'Direct': False,
    'OutboundLeg': {'CarrierIds': [1816],
     'OriginId': 65747,
     'DestinationId': 44904,
     'DepartureDate': '2021-12-15T00:00:00'},
    'QuoteDateTime': '2021-05-22T14:27:00'},
   {'QuoteId': 2,
    'MinPrice': 562,
    'Direct': True,
    'OutboundLeg': {'CarrierIds': [1760],
     'OriginId': 65747,
     'DestinationId': 54661,
     'DepartureDate': '2021-12-15T00:00:00'},
    'QuoteDateTime': '2021-05-24T14:50:00'}],
  'Carriers': [{'CarrierId': 1760, 'Name': 'TAP Air Portugal'},
   {'CarrierId': 1816, 'Name': 'Air Europa'}],
  'Places': [{'Name': 'Sao Paulo Congonhas',
    'Type': 'Station',
    'PlaceId': 44904,
    'IataCode': 'CGH',
    'SkyscannerCode': 'CGH',
    'CityName': 'Sao Paulo',
    'CityId': 'SAOA',
    'CountryName': 'Brazil'},
   {'Name': 'Sao Paulo Guarulhos',
    'Type': 'Station',
    'PlaceId': 54661,
    'IataCode': 'GRU',
    'SkyscannerCode': 'GRU',
    'CityName': 'S