# Load packages

In [1]:
import requests
import pandas as pd
import numpy as np
import globals
import time
from pathlib import Path
from datetime import date

# Working directories

In [2]:
current_path = Path.cwd()
data_path = current_path.parent.joinpath('data')

# Load global token

In [3]:
globals.initialize()

In [4]:
globals.my_token

'6xdmrvwtp9566kcpzrczb67d'

# Define LH API functions

In [5]:
#myToken = '' # personal token, replace with your own credentials
head = {'Authorization': 'Bearer {}'.format(globals.my_token)} # header is described here https://developer.lufthansa.com/docs/read/api_basics/HTTP_request_headers
lh_api_base_url = "https://api.lufthansa.com/" # base url of Lufthansa Public API

In [6]:
def list_to_dataframe(lst):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''

    df = pd.json_normalize(lst)

    return df

def request_countries(countryCode="", lang="en", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    country_url = "v1/mds-references/countries/"

    my_params = {
        "countryCode": countryCode, # 2-letter ISO 3166-1 country code
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+country_url, headers=head, params=my_params).json()

    return r.get('CountryResource').get('Countries').get('Country')

def request_cities(cityCode="", lang="en", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    city_url = "v1/mds-references/cities/" 

    my_params = {
        "cityCode": cityCode, # 3-letter IATA city code
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+city_url, headers=head, params=my_params).json()

    return r.get('CityResource').get('Cities').get('City')

def request_airports(airportCode="", lang="en", limit=100, offset=0, LHoperated=0, group="AllAirports"):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    airports_url = "v1/mds-references/airports/" #{airportCode} 3-letter IATA airport code

    my_params = {
        "airportCode": airportCode,
        "lang": lang,
        "limit": limit, # only 100 possible
        "offset": offset,
        "LHoperated": LHoperated,
        "group": group
    }

    r = requests.get(lh_api_base_url+airports_url, headers=head, params=my_params).json()

    return r.get('AirportResource').get('Airports').get('Airport')

def request_nearest_airports(latitude, longitude, lang="en"):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    nearest_airports_url = "v1/mds-references/airports/nearest/" 

    my_params = {
        "latitude": latitude, # decimal format to at most 3 decimal places
        "longitude": longitude, # decimal format to at most 3 decimal places
        "lang": lang
    }

    r = requests.get(lh_api_base_url+nearest_airports_url, headers=head, params=my_params).json()

    return r.get('NearestAirportResource').get('Airports').get('Airport')

def request_airlines(airlineCode="", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    airlines_url = "v1/mds-references/airlines/" #{airlineCode} 2-character IATA airline/carrier code

    my_params = {
        "airlineCode": airlineCode,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+airlines_url, headers=head, params=my_params).json()

    return r.get('AirlineResource').get('Airlines').get('Airline')

def request_aircrafts(aircraftCode="", limit=100, offset=0):
    '''
    DESCRIPTION

    INPUT

    OUTPUT
    '''
    aircraft_url = "v1/mds-references/aircraft/" 

    my_params = {
        "aircraftCode": aircraftCode, # 3-character IATA aircraft code
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+aircraft_url, headers=head, params=my_params).json()

    return r.get('AircraftResource').get('AircraftSummaries').get('AircraftSummary')

In [7]:
def request_customer_flight_info(flightNumber, date, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of a particular flight (boarding, delayed, etc.)
    INPUT
        flightNumber: Flight number including carrier code and any suffix (e.g. 'LH400')
        date: The departure date (YYYY-MM-DD) in the local time of the departure airport
    OUTPUT
    '''
    cust_flight_info_url = f"v1/operations/customerflightinformation/{flightNumber}/{date}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_url , headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_by_route(origin, destination, date, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of flights between a given origin and destination on a given date.
    INPUT
        origin: 3-letter IATA airport (e.g. 'FRA')
        destination: 3-letter IATA airport code (e.g. 'JFK')
        date: Departure date (YYYY-MM-DD) in local time of departure airport
    OUTPUT
    '''
    cust_flight_info_by_route_url = f"v1/operations/customerflightinformation/route/{origin}/{destination}/{date}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_route_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_at_arrival(airportCode, fromDateTime, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of all arrivals at a given airport up to 4 hours from the provided date time.
    INPUT
        airportCode: 3-letter IATA aiport code (e.g. 'ZRH')
        fromDateTime: Start of time range in local time of arrival airport (YYYY-MM-DDTHH:mm)
        limit: Number of records returned per request. Defaults to 20, maximum is 100 (if a value bigger than 100 is given, 100 will be taken)
        offset: Number of records skipped. Defaults to 0
    OUTPUT
    '''
    cust_flight_info_by_arrival_airport_url = f"v1/operations/customerflightinformation/departures/{airportCode}/{fromDateTime}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_arrival_airport_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_customer_flight_info_at_departure(airportCode, fromDateTime, limit=100, offset=0):
    '''
    DESCRIPTION
        Status of all departures from a given airport up to 4 hours from the provided date time.
    INPUT
        airportCode: 3-letter IATA aiport code (e.g. 'ZRH')
        fromDateTime: Start of time range in local time of arrival airport (YYYY-MM-DDTHH:mm)
        limit: Number of records returned per request. Defaults to 20, maximum is 100 (if a value bigger than 100 is given, 100 will be taken)
        offset: Number of records skipped. Defaults to 0
    OUTPUT
    '''
    cust_flight_info_by_departure_airport_url = f"v1/operations/customerflightinformation/departures/{airportCode}/{fromDateTime}"

    my_params = {
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+cust_flight_info_by_departure_airport_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

def request_flight_schedules(origin, destination, fromDateTime, directFlights=0, limit=100, offset=0):
    '''
    DESCRIPTION
        Scheduled flights between given airports on a given date.
    INPUT

    OUTPUT
    '''

    flight_schedules_url = f"v1/operations/schedules/{origin}/{destination}/{fromDateTime}"

    my_params = {
        "directFlights": directFlights,
        "limit": limit, # only 100 possible
        "offset": offset
    }

    r = requests.get(lh_api_base_url+flight_schedules_url, headers=head, params=my_params).json()

    return r.get('FlightInformation').get('Flights').get('Flight')

In [8]:
def get_lh_data(function, *args, **kwargs):
    condition = True
    result = []
    init_offset = 0

    while condition:
        if function == request_airports:
            if init_offset == 6900:
                init_offset +=100
        try:
            time.sleep(np.random.randint(5,11))
            result.extend(function(offset=init_offset, *args, **kwargs))
            init_offset += 100
        except AttributeError:
            condition = False
            break

    return result

In [65]:
pd.json_normalize(request_airlines()).drop_duplicates(subset=["AirlineID"])

Unnamed: 0,AirlineID,AirlineID_ICAO,Names.Name.@LanguageCode,Names.Name.$
0,0A,GNT,EN,Amber Air
1,0B,BMS,EN,Blue Air
2,0D,DWT,EN,Darwin Airline Sa
3,0J,PJZ,EN,Premium Jet Ag
4,0K,KRT,EN,Aircompany Kokshetau
...,...,...,...,...
97,5D,SLI,EN,Aeromexico Connect
98,5E,JTM,EN,East Coast Flight Services Inc
99,5F,FIA,EN,Flyone
100,5G,CUO,EN,Aerocuahonte


In [66]:
def list_to_dataframe(name):
    name_dict = {
        "countries": (request_countries, "CountryCode"),
        "cities": (request_cities, "CityCode"),
        "airports": (request_airports, "AirportCode"),
        "aircrafts": (request_aircrafts, "AircraftCode"),
        "airlines": (request_airlines, "AirlineID")
    }

    lst = get_lh_data(name_dict.get(name)[0])
    df_raw = pd.json_normalize(lst)
    print(f'Shape of df_{name}: {df_raw.shape}')

    df_dd = df_raw.drop_duplicates(subset=[name_dict.get(name)[1]])
    print(f'Shape of df_{name}_dd: {df_dd.shape}')

    df_dd.to_csv(data_path.joinpath(f'df_{name}.csv'), sep=";", index=False)
    print(f'exported as df_{name}.csv')

In [68]:
list_to_dataframe("cities")

Shape of df_cities: (10700, 7)
Shape of df_cities_dd: (10693, 7)
exported as df_cities.csv


In [57]:
list_to_dataframe("countries")

Shape of df_countries: (239, 3)
Shape of df_countries_dd: (238, 3)
exported as df_countries.csv


In [67]:
list_to_dataframe("airlines")

Shape of df_airlines: (1136, 4)
Shape of df_airlines_dd: (1127, 4)
exported as df_airlines.csv


In [29]:
list_aircrafts = get_lh_data(request_aircraft)
print("Lenght aircrafts:", len(list_aircrafts))
df_aircrafts = pd.json_normalize(list_aircrafts)
print("Shape of df_aircrafts:", df_aircrafts.shape)
df_aircrafts = df_aircrafts.drop_duplicates(subset=["AircraftCode"])
print("Shape of df_aircrafts:", df_aircrafts.shape)
df_aircrafts.to_csv(data_path.joinpath('df_aircrafts.csv'), sep=";", index=False)

Lenght aircrafts: 382
Shape of df_aircrafts: (382, 4)
Shape of df_aircrafts: (380, 4)


In [10]:
list_airports = get_lh_data(request_airports)
len(list_airports)

11681

In [16]:
df_airports = pd.json_normalize(list_airports)
df_airports.to_csv(data_path.joinpath('df_airports.csv'), sep=";", index=False)
print("Shape of df_airports:", df_airports.shape)
df_airports.head()

Shape of df_airports: (11681, 10)


Unnamed: 0,AirportCode,CityCode,CountryCode,LocationType,UtcOffset,TimeZoneId,Position.Coordinate.Latitude,Position.Coordinate.Longitude,Names.Name.@LanguageCode,Names.Name.$
0,AAA,AAA,PF,Airport,-10:00,Pacific/Tahiti,-17.3525,-145.51,EN,Anaa
1,AAB,AAB,AU,Airport,+10:00,Australia/Brisbane,-26.6911,141.0472,EN,Arrabury
2,AAC,AAC,EG,Airport,+02:00,Africa/Cairo,31.0733,33.8358,EN,El Arish International
3,AAD,AAD,SO,Airport,+03:00,Africa/Mogadishu,6.0961,46.6375,EN,Adado
4,AAE,AAE,DZ,Airport,+01:00,Africa/Algiers,36.8222,7.8092,EN,Annaba Rabah Bitat


In [20]:
df_airports[df_airports.duplicated(subset=["AirportCode"])]

Unnamed: 0,AirportCode,CityCode,CountryCode,LocationType,UtcOffset,TimeZoneId,Position.Coordinate.Latitude,Position.Coordinate.Longitude,Names.Name.@LanguageCode,Names.Name.$
100,ADX,ADX,GB,Airport,+00:00,Europe/London,56.3667,-2.8667,EN,St Andrews Raf Leuchars
2968,GOM,GOM,CD,Airport,+02:00,Africa/Lubumbashi,-1.6708,29.2383,EN,Goma
2990,GPS,GPS,EC,Airport,-06:00,Pacific/Galapagos,-0.4539,-90.2658,EN,Baltra Island Seymour
3180,HBX,HBX,IN,Airport,+05:30,Asia/Kolkata,15.3617,75.0869,,
3203,HDN,HDN,US,Airport,-07:00,America/Denver,40.4833,-107.2236,,


In [27]:
df_airports_dd = df_airports.drop_duplicates(subset=["AirportCode"])
print("Shape of df_airports_dd:", df_airports_dd.shape)
df_airports_dd.to_csv(data_path.joinpath('df_airports.csv'), sep=";", index=False)

Shape of df_airports_dd: (11676, 10)


# Get all current flights

In [76]:
df_airports = pd.read_csv(data_path.joinpath('df_airports.csv'), sep=';', header=0)
df_airports.head()

Unnamed: 0,AirportCode,CityCode,CountryCode,LocationType,UtcOffset,TimeZoneId,Position.Coordinate.Latitude,Position.Coordinate.Longitude,Names.Name.@LanguageCode,Names.Name.$
0,AAA,AAA,PF,Airport,-10:00,Pacific/Tahiti,-17.3525,-145.51,EN,Anaa
1,AAB,AAB,AU,Airport,+10:00,Australia/Brisbane,-26.6911,141.0472,EN,Arrabury
2,AAC,AAC,EG,Airport,+02:00,Africa/Cairo,31.0733,33.8358,EN,El Arish International
3,AAD,AAD,SO,Airport,+03:00,Africa/Mogadishu,6.0961,46.6375,EN,Adado
4,AAE,AAE,DZ,Airport,+01:00,Africa/Algiers,36.8222,7.8092,EN,Annaba Rabah Bitat


In [10]:
flights_sample = []
current_date = date.today().strftime("%Y-%m-%d")
hours = np.arange(6,24,4)

for airport_code in ['BER', 'PAR', 'MUC']:#df_airports['AirportCode'].values:
    for hour in hours:
        try:
            time.sleep(np.random.uniform(3.8,5.0))
            tmp = request_customer_flight_info_at_departure(airportCode=airport_code, fromDateTime=f'{current_date}T{hour:02d}:00')
            flights_sample.extend(tmp)
        except AttributeError:
            continue

df_flights = pd.json_normalize(flights_sample)
print(df_flights.shape)
#df_flights.to_csv(data_path.jointpath(f'df_flights_{current_date}.csv'), sep=';', index=False)

(278, 30)


In [12]:
df_flights.head()

Unnamed: 0,Departure.AirportCode,Departure.Scheduled.Date,Departure.Scheduled.Time,Departure.Terminal.Name,Departure.Status.Code,Departure.Status.Description,Arrival.AirportCode,Arrival.Scheduled.Date,Arrival.Scheduled.Time,Arrival.Terminal.Name,...,Departure.Terminal.Gate,Arrival.Actual.Date,Arrival.Actual.Time,Arrival.Terminal.Gate,Arrival.Estimated.Date,Arrival.Estimated.Time,MarketingCarrierList.MarketingCarrier.AirlineID,MarketingCarrierList.MarketingCarrier.FlightNumber,Departure.Estimated.Date,Departure.Estimated.Time
0,BER,2023-03-26,06:20,1,NO,No Status,MUC,2023-03-26,07:30,2.0,...,,,,,,,,,,
1,BER,2023-03-26,06:45,1,DP,Flight Departed,FRA,2023-03-26,07:55,1.0,...,B06,2023-03-26,08:12,A16,,,,,,
2,BER,2023-03-26,06:55,1,DP,Flight Departed,BRU,2023-03-26,08:20,,...,B19,2023-03-26,08:14,,,,,,,
3,BER,2023-03-26,07:00,1,DP,Flight Departed,ZRH,2023-03-26,08:25,,...,B10,2023-03-26,08:38,,,,,,,
4,BER,2023-03-26,07:00,1,DP,Flight Departed,VIE,2023-03-26,08:15,3.0,...,A02,2023-03-26,08:07,,,,,,,
