In [1]:
from traffic.data import opensky
import pandas as pd
from datetime import datetime, timedelta
import h5py
import os

  from tqdm.autonotebook import tqdm


In [2]:
def download_month(month: int, year: int, start_day = 1, cached=True):
    """
    gets data from opensky database by month, year, and first relevant day
    and save the data as h5-file
    """

    if month < 10:
        str_month = "0" + str(month)
    else:
        str_month = str(month)

    filename = "Frankfurt_LH_" + str(year)[2:] + str_month + ".h5"


    next_month = month + 1 if month < 12 else  1
    str_next_month = str(next_month) if next_month > 9 else "0" + str(next_month)

    start_day_str = "0" + str(start_day) if start_day < 10 else str(start_day)
    datetime_str_start = str(year) + "-"+ str_month + "-" + start_day_str +  " 00:00:00"
    datetime_str_end = str(year) + "-" + str_next_month + "-01 00:00:00" if next_month != 1 else str(year+1) + "-" + str_next_month + "-01 00:00:00"
    datetime_current = datetime.strptime(datetime_str_start, '%Y-%m-%d %H:%M:%S')
    datetime_end = datetime.strptime(datetime_str_end, '%Y-%m-%d %H:%M:%S')

    while datetime_current < datetime_end:

        print(datetime_current)
        day = datetime_current.day
        if day < 10:
            day_str = "0" + str(day)
        else:
            day_str = str(day)

        h5_key = "LH_" + str(year)[2:] + str_month + day_str
        print(h5_key)
        str_current_day = str(datetime_current)
        str_next_day = str(datetime_current + timedelta(days = 1))
        print("getting data between "+str_current_day+ " and "+ str_next_day)

        trajectories = opensky.history(str_current_day,
                            stop= str_next_day,
                            arrival_airport ="EDDF",
                            cached=cached)
        try:
            callsigns_lh = trajectories.data.callsign.loc[trajectories.data.callsign.str.contains("DLH").astype(bool)]

            callsigns_lh = callsigns_lh.unique()
            callsigns_lh = callsigns_lh[~pd.isnull(callsigns_lh)]
            trajectories_lh = trajectories[callsigns_lh]
            print("add",h5_key,"to file")

            trajectories_lh.to_hdf(filename, key=h5_key, format = 'table')

        except AttributeError as e:
            print(e)

        datetime_current += timedelta(days = 1)


In [None]:
download_month(month=6, year=2023, cached = True, start_day=5)

2023-06-05 00:00:00
LH_230605
getting data between 2023-06-05 00:00:00 and 2023-06-06 00:00:00


  0%|          | 0/24 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])


add LH_230605 to file
2023-06-06 00:00:00
LH_230606
getting data between 2023-06-06 00:00:00 and 2023-06-07 00:00:00


  0%|          | 0/24 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
  df = pd.read_csv(s, dtype={"icao24": str, "callsign": str})
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
  df = pd.read_csv(s, dtype={"icao24": str, "callsign": str})
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
  df = pd.read_csv(s, dtype={"icao24": str, "callsign": str})
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-note

add LH_230606 to file
2023-06-07 00:00:00
LH_230607
getting data between 2023-06-07 00:00:00 and 2023-06-08 00:00:00


  0%|          | 0/24 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
  df = pd.read_csv(s, dtype={"icao24": str, "callsign": str})
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array

add LH_230607 to file
2023-06-08 00:00:00
LH_230608
getting data between 2023-06-08 00:00:00 and 2023-06-09 00:00:00


  0%|          | 0/24 [00:00<?, ?it/s]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common_type = np.find_common_type(


In [3]:
with h5py.File("Frankfurt_LH_2306.h5", 'r') as f:
    keys = print(list(f.keys()))


['LH_230601', 'LH_230602', 'LH_230603', 'LH_230604']


In [3]:
os.getcwd()

'C:\\Users\\dario\\Documents\\Master Data Science\\Lufthansa\\Lufthansa-Arrival-Time-Prediction\\data'

In [7]:
data = pd.read_csv("testcase.csv")

In [11]:
data = data.drop(columns=["Unnamed: 0"])

In [12]:
from random import sample

In [19]:
flight_ids = sample(list(data.callsign), 10)
flight_ids

['DLH507',
 'DLH457',
 'DLH471',
 'DLH471',
 'DLH1307',
 'DLH1TN',
 'DLH507',
 'DLH757',
 'DLH757',
 'DLH401']

In [20]:
data = data[data.callsign.isin(flight_ids)]
data.shape

(89322, 21)

In [21]:
data.to_csv("testcases.csv", index=False)