In [1]:
import pandas as pd
import numpy as np
import json
import urllib.request as ur
import datetime

### Create function that outputs cheapest flights for given parameters

In [2]:
def get_flight_info(FROM = 'BUD', TO = 'EWR', START_DATE = '2021-08-15', END_DATE = '2021-08-29'):

    base_url = 'https://partners.api.skyscanner.net/apiservices/browsequotes/v1.0/HU/HUF/en/'
    api_key = 'ah395258861593902161819075536914'
    api_link = '?apiKey=' + api_key
    

    URL = base_url + FROM + '/' + TO + '/' + START_DATE + '/' + END_DATE + api_link
    # originally at the end of the API link: &fbclid=IwAR0tnNzpDS_ISF3UDsmX9o2VXyoBdPNdWYgS2Z4d7wETD2CqQoZ7CPc9dtg

    try:    
        with ur.urlopen(URL) as url:
            data = json.loads(url.read().decode())

            if data['Quotes'] == []:
                return None

            else:
                # process Places info

                places = pd.json_normalize(data['Places'])
                places.drop(['IataCode', 'CityId', 'Type', 'Name'], 1, inplace = True)

                # process Carrier info

                carriers = pd.json_normalize(data['Carriers'])

                # process Quota info

                quotes = pd.json_normalize(data['Quotes'])
                quotes.drop(['QuoteId', 'OutboundLeg.OriginId', 'InboundLeg.DestinationId', 'InboundLeg.OriginId'], 1, inplace = True)
                quotes.rename(columns = {'OutboundLeg.CarrierIds' : 'carrier_outbound', 'OutboundLeg.DestinationId' : 'destination', 'InboundLeg.CarrierIds' : 'carrier_inbound', 'InboundLeg.DepartureDate' : 'end_date', 'OutboundLeg.DepartureDate' : 'start_date', 'QuoteDateTime' : 'quote_date'}, inplace = True)

                quotes['start_date'] = pd.to_datetime(quotes['start_date'])
                quotes['end_date'] = pd.to_datetime(quotes['end_date'])
                quotes['quote_date'] = pd.to_datetime(quotes['quote_date'])

                quotes['carrier_outbound'] = quotes['carrier_outbound'].str[0]
                quotes['carrier_inbound'] = quotes['carrier_inbound'].str[0]

                quotes = quotes.merge(places, left_on = 'destination', right_on = 'PlaceId').drop(['destination', 'PlaceId'], 1)
                quotes.rename(columns = {'SkyscannerCode' : 'dest_airport', 'CityName' : 'dest_city', 'CountryName' : 'dest_country',}, inplace = True)

                quotes = quotes.merge(carriers, left_on = 'carrier_outbound', right_on = 'CarrierId').drop(['CarrierId', 'carrier_outbound'], 1)
                quotes.rename(columns = {'Name' : 'carrier_outbound'}, inplace = True)

                quotes = quotes.merge(carriers, left_on = 'carrier_inbound', right_on = 'CarrierId').drop(['CarrierId', 'carrier_inbound'], 1)
                quotes.rename(columns = {'Name' : 'carrier_inbound', 'MinPrice' : 'price_HUF', 'Direct' : 'direct'}, inplace = True)

                return quotes.sort_values('price_HUF')

    except:
        pass

### Loop thru multiple dates and put together flight data

In [3]:
from collections import defaultdict

In [4]:
start_date = '2021-08-12'
start_date_window = 10
days_length_min = 7
days_length_max = 15

start_date_ = datetime.datetime.strptime(start_date, '%Y-%m-%d')
end_dates = defaultdict(list)

for j in range(start_date_window + 1):
    
    actual_start_date = start_date_ + datetime.timedelta(days = j)
    actual_start_date = actual_start_date.strftime('%Y-%m-%d')
    # actual_start_date = datetime.datetime.strptime(actual_start_date, '%Y-%m-%d')
    
    for i in range(days_length_min, days_length_max + 1):
        
        actual_start_date = datetime.datetime.strptime(actual_start_date, '%Y-%m-%d')    
        end_date = actual_start_date + datetime.timedelta(days = i)
        
        end_date = end_date.strftime('%Y-%m-%d')
        actual_start_date = actual_start_date.strftime('%Y-%m-%d')
        
        end_dates[actual_start_date].append(end_date)

end_dates = dict(end_dates)

In [5]:
data_collector = []

for start_date in end_dates.keys():
    
    for end_date in end_dates[start_date]:
        
        data = get_flight_info(FROM = 'BUD', TO = 'Mosc', START_DATE = start_date, END_DATE = end_date)
    
        if data is not None:
            data_collector.append(data)

        else:
            pass    

In [6]:
if data_collector != []:
    data = pd.concat(data_collector)
    data.sort_values('price_HUF', inplace = True)

### Explore final data table

In [7]:
data

Unnamed: 0,price_HUF,direct,quote_date,start_date,end_date,dest_airport,dest_city,dest_country,carrier_outbound,carrier_inbound
0,21291,True,2021-07-19 10:42:00,2021-08-19,2021-08-26,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,21672,True,2021-07-20 23:03:00,2021-08-19,2021-09-02,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,23358,True,2021-07-21 07:04:00,2021-08-15,2021-08-26,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,23358,True,2021-07-21 06:43:00,2021-08-22,2021-09-02,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,23358,True,2021-07-21 06:43:00,2021-08-22,2021-09-05,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,24109,True,2021-07-18 12:12:00,2021-08-15,2021-08-22,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,34253,True,2021-07-20 22:45:00,2021-08-19,2021-08-29,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,34271,True,2021-07-21 06:43:00,2021-08-12,2021-08-19,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,34271,True,2021-07-21 05:38:00,2021-08-12,2021-08-26,VKO,Moscow,Russia,Wizz Air,Wizz Air
0,35939,True,2021-07-21 07:04:00,2021-08-15,2021-08-29,VKO,Moscow,Russia,Wizz Air,Wizz Air
