In [147]:
import pandas as pd
import numpy as np
import json
import urllib.request as ur
import datetime

### Create function that outputs cheapest flights for given parameters

In [110]:
def get_flight_info(FROM = 'BUD', TO = 'EWR', START_DATE = '2021-08-10', END_DATE = '2021-08-16'):

    base_url = 'https://partners.api.skyscanner.net/apiservices/browsequotes/v1.0/HU/HUF/en-US/'
    api_key = 'ah395258861593902161819075536914'
    api_link = '?apiKey=' + api_key

    URL = base_url + FROM + '/' + TO + '/' + START_DATE + '/' + END_DATE + api_link
    # originally at the end of the API link: &fbclid=IwAR0tnNzpDS_ISF3UDsmX9o2VXyoBdPNdWYgS2Z4d7wETD2CqQoZ7CPc9dtg

    try:    
        with ur.urlopen(URL) as url:
            data = json.loads(url.read().decode())

            if data['Quotes'] == []:
                return None

            else:
                # process Places info

                places = pd.json_normalize(data['Places'])
                places.drop(['IataCode', 'CityId', 'Type', 'Name'], 1, inplace = True)

                # process Carrier info

                carriers = pd.json_normalize(data['Carriers'])

                # process Quota info

                quotes = pd.json_normalize(data['Quotes'])
                quotes.drop(['QuoteId', 'OutboundLeg.OriginId', 'InboundLeg.DestinationId', 'InboundLeg.OriginId'], 1, inplace = True)
                quotes.rename(columns = {'OutboundLeg.CarrierIds' : 'carrier_outbound', 'OutboundLeg.DestinationId' : 'destination', 'InboundLeg.CarrierIds' : 'carrier_inbound', 'InboundLeg.DepartureDate' : 'end_date', 'OutboundLeg.DepartureDate' : 'start_date', 'QuoteDateTime' : 'quote_date'}, inplace = True)

                quotes['start_date'] = pd.to_datetime(quotes['start_date'])
                quotes['end_date'] = pd.to_datetime(quotes['end_date'])
                quotes['quote_date'] = pd.to_datetime(quotes['quote_date'])

                quotes['carrier_outbound'] = quotes['carrier_outbound'].str[0]
                quotes['carrier_inbound'] = quotes['carrier_inbound'].str[0]

                quotes = quotes.merge(places, left_on = 'destination', right_on = 'PlaceId').drop(['destination', 'PlaceId'], 1)
                quotes.rename(columns = {'SkyscannerCode' : 'dest_airport', 'CityName' : 'dest_city', 'CountryName' : 'dest_country',}, inplace = True)

                quotes = quotes.merge(carriers, left_on = 'carrier_outbound', right_on = 'CarrierId').drop(['CarrierId', 'carrier_outbound'], 1)
                quotes.rename(columns = {'Name' : 'carrier_outbound'}, inplace = True)

                quotes = quotes.merge(carriers, left_on = 'carrier_inbound', right_on = 'CarrierId').drop(['CarrierId', 'carrier_inbound'], 1)
                quotes.rename(columns = {'Name' : 'carrier_inbound', 'MinPrice' : 'price_HUF', 'Direct' : 'direct'}, inplace = True)

                return quotes.sort_values('price_HUF')

    except:
        pass

In [141]:
data = get_flight_info(FROM = 'BUD', TO = 'JFK', START_DATE = '2021-08-10', END_DATE = '2021-08-17')
data

Unnamed: 0,price_HUF,direct,quote_date,start_date,end_date,dest_airport,dest_city,dest_country,carrier_outbound,carrier_inbound
0,153639,False,2021-06-27 16:17:00,2021-08-10,2021-08-17,JFK,New York,United States,SWISS,United


### Loop thru multiple dates and put together flight data

In [177]:
start_date = '2021-07-08'
days_length_min = 10
days_length_max = 13

start_date_ = datetime.datetime.strptime(start_date, '%Y-%m-%d')
end_dates = []

for i in range (days_length_min, days_length_max + 1):
    end_date = start_date_ + datetime.timedelta(days = i)
    end_date = end_date.strftime('%Y-%m-%d')
    end_dates.append(end_date)

print('Starting date:', start_date)
print('Ending dates:', ', '.join(end_dates))

Starting date: 2021-07-08
Ending dates: 2021-07-18, 2021-07-19, 2021-07-20, 2021-07-21


In [191]:
data_collector = []

for end_date in end_dates:
    
    data = get_flight_info(FROM = 'BUD', TO = 'Anywhere', START_DATE = start_date, END_DATE = end_date)
    
    if data is not None:
        data_collector.append(data)
        
    else:
        pass    

In [196]:
if data_collector != []:
    data = pd.concat(data_collector)
    data.sort_values('price_HUF', inplace = True)

### Explore final data table

In [198]:
data

Unnamed: 0,price_HUF,direct,quote_date,start_date,end_date,dest_airport,dest_city,dest_country,carrier_outbound,carrier_inbound
0,7092,True,2021-06-29 08:57:00,2021-07-08,2021-07-21,BGY,Milan,Italy,Ryanair,Ryanair
0,7113,True,2021-06-29 09:09:00,2021-07-08,2021-07-20,BGY,Milan,Italy,Ryanair,Ryanair
8,7935,True,2021-06-30 09:56:00,2021-07-08,2021-07-20,CRL,Brussels,Belgium,Wizz Air,Ryanair
9,9199,True,2021-06-30 09:56:00,2021-07-08,2021-07-21,CRL,Brussels,Belgium,Wizz Air,Ryanair
0,10452,True,2021-06-29 16:15:00,2021-07-08,2021-07-18,SVQ,Seville,Spain,Ryanair,Ryanair
...,...,...,...,...,...,...,...,...,...,...
32,197914,False,2021-06-29 12:12:00,2021-07-08,2021-07-18,CUN,Cancun,Mexico,KLM,Air France
22,230765,False,2021-06-27 11:52:00,2021-07-08,2021-07-19,ZNZ,Zanzibar,Tanzania,Turkish Airlines,Precision Air
17,234308,False,2021-06-27 11:27:00,2021-07-08,2021-07-21,ZNZ,Zanzibar,Tanzania,Turkish Airlines,Turkish Airlines
18,257771,False,2021-06-27 12:45:00,2021-07-08,2021-07-21,SEZ,Mahe Island,Seychelles,Etihad Airways,Etihad Airways
