In [47]:
import requests
import pandas as pd
import globals

In [49]:
globals.initialize()

In [52]:
#myToken = '' # personal token, replace with your own credentials
head = {'Authorization': 'Bearer {}'.format(globals.my_token)} # header is described here https://developer.lufthansa.com/docs/read/api_basics/HTTP_request_headers
lh_api_base_url = "https://api.lufthansa.com/" # base url of Lufthansa Public API

In [34]:
def list_to_dataframe(lst):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''

    df = pd.json_normalize(lst)

    return df

def request_countries(countryCode="", lang="en", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    country_url = "v1/mds-references/countries/"

    my_params = {
        "countryCode": countryCode, # 2-letter ISO 3166-1 country code
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+country_url, headers=head, params=my_params).json()

    return r.get('CountryResource').get('Countries').get('Country')

def request_cities(cityCode="", lang="en", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    city_url = "v1/mds-references/cities/" 

    my_params = {
        "cityCode": cityCode, # 3-letter IATA city code
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+city_url, headers=head, params=my_params).json()

    return r.get('CityResource').get('Cities').get('City')

def request_airports(airportCode="", lang="en", limit=100, offset=0, LHoperated=0, group="AllAirports"):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    airports_url = "v1/mds-references/airports/" #{airportCode} 3-letter IATA airport code

    my_params = {
        "airportCode": airportCode,
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset,
        "LHoperated": LHoperated,
        "group": group
    }

    r = requests.get(lh_api_base_url+airports_url, headers=head, params=my_params).json()

    return r.get('AirportResource').get('Airports').get('Airport')

def request_nearest_airports(latitude, longitude, lang="en"):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    nearest_airports_url = "v1/mds-references/airports/nearest/" 

    my_params = {
        "latitude": latitude, # decimal format to at most 3 decimal places
        "longitude": longitude, # decimal format to at most 3 decimal places
        "lang": lang
    }

    r = requests.get(lh_api_base_url+nearest_airports_url, headers=head, params=my_params).json()

    return r.get('NearestAirportResource').get('Airports').get('Airport')

def request_airlines(airlineCode="", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    airlines_url = "v1/mds-references/airlines/" #{airlineCode} 2-character IATA airline/carrier code

    my_params = {
        "airlineCode": airlineCode,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+airlines_url, headers=head, params=my_params).json()

    return r.get('AirlineResource').get('Airlines').get('Airline')

def request_aircraft(aircraftCode="", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    aircraft_url = "v1/mds-references/aircraft/" 

    my_params = {
        "aircraftCode": aircraftCode, # 3-character IATA aircraft code
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+aircraft_url, headers=head, params=my_params).json()

    return r.get('AircraftResource').get('AircraftSummaries').get('AircraftSummary')

In [42]:
def request_customer_flight_info(flightNumber, date, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of a particular flight (boarding, delayed, etc.)
    INPUT
        flightNumber: Flight number including carrier code and any suffix (e.g. 'LH400')
        date: The departure date (YYYY-MM-DD) in the local time of the departure airport
    OUTPUT
    '''
    cust_flight_info_url = f"v1/operations/customerflightinformation/{flightNumber}/{date}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_url , headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_by_route(origin, destination, date, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of flights between a given origin and destination on a given date.
    INPUT
        origin: 3-letter IATA airport (e.g. 'FRA')
        destination: 3-letter IATA airport code (e.g. 'JFK')
        date: Departure date (YYYY-MM-DD) in local time of departure airport
    OUTPUT
    '''
    cust_flight_info_by_route_url = f"v1/operations/customerflightinformation/route/{origin}/{destination}/{date}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_route_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_at_arrival(airportCode, fromDateTime, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of all arrivals at a given airport up to 4 hours from the provided date time.
    INPUT
        airportCode: 3-letter IATA aiport code (e.g. 'ZRH')
        fromDateTime: Start of time range in local time of arrival airport (YYYY-MM-DDTHH:mm)
        limit: Number of records returned per request. Defaults to 20, maximum is 100 (if a value bigger than 100 is given, 100 will be taken)
        offset: Number of records skipped. Defaults to 0
    OUTPUT
    '''
    cust_flight_info_by_arrival_airport_url = f"v1/operations/customerflightinformation/departures/{airportCode}/{fromDateTime}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_arrival_airport_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_at_departure(airportCode, fromDateTime, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of all departures from a given airport up to 4 hours from the provided date time.
    INPUT
        airportCode: 3-letter IATA aiport code (e.g. 'ZRH')
        fromDateTime: Start of time range in local time of arrival airport (YYYY-MM-DDTHH:mm)
        limit: Number of records returned per request. Defaults to 20, maximum is 100 (if a value bigger than 100 is given, 100 will be taken)
        offset: Number of records skipped. Defaults to 0
    OUTPUT
    '''
    cust_flight_info_by_departure_airport_url = f"v1/operations/customerflightinformation/departures/{airportCode}/{fromDateTime}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_departure_airport_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_flight_schedules(origin, destination, fromDateTime, directFlights=0, limit=100, offset=0):
    '''
    DESCRIPTION
        Scheduled flights between given airports on a given date.
    INPUT

    OUTPUT
    '''

    flight_schedules_url = f"v1/operations/schedules/{origin}/{destination}/{fromDateTime}"

    my_params = {
        "directFlights": directFlights,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+flight_schedules_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

In [31]:
def get_lh_data(function, *args, **kwargs):
    condition = True
    result = []
    init_offset = 0

    while condition:
        try:
            result.extend(function(offset=init_offset, *args, **kwargs))
            init_offset += 100
        except AttributeError:
            condition = False
            break

    return result

In [32]:
list_aircrafts = get_lh_data(request_aircraft)
len(list_aircrafts)

382

In [35]:
list_airlines = get_lh_data(request_airlines)
len(list_airlines)

1136

In [37]:
list_airports = get_lh_data(request_airports)
len(list_airports)

6900

In [39]:
list_cities = get_lh_data(request_cities)
len(list_cities)

10700

In [41]:
list_countries = get_lh_data(request_countries)
len(list_countries)

239

In [40]:
df = pd.json_normalize(list_cities)
df.to_csv('../data/df_cities.csv', sep=";", index=False)
df.head()

Unnamed: 0,CityCode,CountryCode,UtcOffset,TimeZoneId,Names.Name.@LanguageCode,Names.Name.$,Airports.AirportCode
0,AAA,PF,-10:00,Pacific/Tahiti,EN,Anaa,AAA
1,AAB,AU,+10:00,Australia/Brisbane,EN,Arrabury,AAB
2,AAC,EG,+02:00,Africa/Cairo,EN,El Arish,AAC
3,AAD,SO,+03:00,Africa/Mogadishu,EN,Adado,AAD
4,AAE,DZ,+01:00,Africa/Algiers,EN,Annaba,AAE


In [44]:
r_BER_20230301 = request_customer_flight_info_at_departure("BER", fromDateTime="2023-03-01T06:00")
df_BER_20230301 = pd.json_normalize(r_BER_20230301)
df_BER_20230301.to_csv('../data/df_BER_20230301.csv', sep=";", index=False)
df_BER_20230301.head()

Unnamed: 0,Departure.AirportCode,Departure.Scheduled.Date,Departure.Scheduled.Time,Departure.Actual.Date,Departure.Actual.Time,Departure.Terminal.Name,Departure.Terminal.Gate,Departure.Status.Code,Departure.Status.Description,Arrival.AirportCode,...,Arrival.Status.Description,OperatingCarrier.AirlineID,OperatingCarrier.FlightNumber,Equipment.AircraftCode,Status.Code,Status.Description,Arrival.Terminal.Gate,MarketingCarrierList.MarketingCarrier,MarketingCarrierList.MarketingCarrier.AirlineID,MarketingCarrierList.MarketingCarrier.FlightNumber
0,BER,2023-03-01,06:20,2023-03-01,06:21,1,B36,DP,Flight Departed,STR,...,Flight Landed,EW,8000,320,LD,Flight Landed,,,,
1,BER,2023-03-01,06:20,2023-03-01,06:22,1,A35,DP,Flight Departed,CGN,...,Flight Landed,EW,8058,320,LD,Flight Landed,B20,,,
2,BER,2023-03-01,06:30,2023-03-01,06:33,1,B10,DP,Flight Departed,FRA,...,Flight Landed,LH,173,32Q,LD,Flight Landed,Area A,"[{'AirlineID': 'AC', 'FlightNumber': '9116'}, ...",,
3,BER,2023-03-01,06:30,2023-03-01,06:26,1,B21,DP,Flight Departed,MUC,...,Flight Landed,LH,1957,32V,LD,Flight Landed,G30,"[{'AirlineID': 'A3', 'FlightNumber': '1590'}, ...",,
4,BER,2023-03-01,06:50,,,1,B14,NO,No Status,BRU,...,No Status,SN,2592,320,CD,Flight Cancelled,,"[{'AirlineID': 'HU', 'FlightNumber': '8552'}, ...",,


In [53]:
df_airports = pd.json_normalize(list_airports)

In [63]:
flights_20230301T0600 = []
from_time = '20230301T0600'

for airport_code in df_airports.sample(2000)['AirportCode'].values:
    try:
        tmp = request_customer_flight_info_at_departure(airportCode=airport_code, fromDateTime=from_time)
        print(tmp)
        flights_20230301T0600.extend(tmp)
    except AttributeError:
        continue

len(flights_20230301T0600)
    

KeyboardInterrupt: 

In [61]:
df_airports.sample(100)['AirportCode'].values

array(['NAQ', 'DJA', 'PIZ', 'DUA', 'BYH', 'OKL', 'MEI', 'NLF', 'HXX',
       'AJL', 'EWS', 'ALA', 'JPE', 'CEJ', 'FIL', 'BHL', 'GND', 'OBS',
       'HTR', 'INV', 'INP', 'LAF', 'HAD', 'ATX', 'KCN', 'EDF', 'PLB',
       'LSP', 'KLN', 'JMU', 'KXD', 'DMK', 'FOS', 'AAV', 'IZA', 'NAI',
       'AGM', 'NZH', 'BMI', 'HIA', 'OXF', 'MBW', 'ESF', 'BEV', 'MDB',
       'MNG', 'JKL', 'CLE', 'FGU', 'BDD', 'EWE', 'KFA', 'ADF', 'BDG',
       'JSK', 'PAD', 'ISD', 'BVB', 'FOB', 'POZ', 'ORV', 'ELL', 'LAP',
       'GBL', 'NOO', 'MUJ', 'LGL', 'DKS', 'MDA', 'DCI', 'ENU', 'PNQ',
       'ISQ', 'BSK', 'KLP', 'BOJ', 'DWN', 'LKC', 'DGH', 'CFE', 'MSK',
       'KYO', 'EVI', 'OTH', 'OYE', 'NCJ', 'NZA', 'BJT', 'NZC', 'KYT',
       'PHE', 'JIR', 'JGR', 'KAG', 'AUV', 'CLV', 'BVY', 'BAR', 'MTX',
       'ISH'], dtype=object)

In [None]:
df_flights_20230301T0600 = pd.json_normalize(flights_20230301T0600)
df_flights_20230301T0600.to_csv('../data/df_flights_20230301T0600.csv', sep=";", index=False)
df_flights_20230301T0600.head()