In [1]:
# Import necessary libraries

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import xarray as xr
import cartopy.crs as ccrs

import random
import time

from pycontrails import Flight
from pycontrails.datalib.ecmwf import ERA5
from pycontrails.models.cocip import Cocip
from pycontrails.models.apcemm import APCEMM
from pycontrails.models.dry_advection import DryAdvection
from pycontrails.core import met_var, GeoVectorDataset, models, vector
from pycontrails.physics import constants, thermo, units
from pycontrails.datalib.ecmwf import ERA5ARCO

from pycontrails import Flight
from pycontrails.datalib.ecmwf import ERA5
from pycontrails.models.cocip import Cocip
from pycontrails.models.dry_advection import DryAdvection
from pycontrails.models.humidity_scaling import ConstantHumidityScaling
from alive_progress import alive_bar

from src.sampling import calcTotalDistance, samplePoint, generateFlight
from src.geodata import open_dataset_from_sample

In [2]:
"""df = pd.read_csv('flight_data/flightlist_20190101_20190131.csv.gz')
df.drop('number', axis=1, inplace=True)
df.drop('registration', axis=1, inplace=True)
df.drop('icao24', axis=1, inplace=True)
df.to_pickle('flight_data/flightlist_20190101_20190131.pkl')"""

"df = pd.read_csv('flight_data/flightlist_20190101_20190131.csv.gz')\ndf.drop('number', axis=1, inplace=True)\ndf.drop('registration', axis=1, inplace=True)\ndf.drop('icao24', axis=1, inplace=True)\ndf.to_pickle('flight_data/flightlist_20190101_20190131.pkl')"

In [3]:
n_samples = 100
n_flights = 100

In [4]:
# TODO: read in multiple picked files and combine them
df = pd.read_pickle('flight_data/flightlist_20190101_20190131.pkl')

In [5]:
#df = df.sample(frac=1)

In [6]:
samples = np.empty((n_samples,2),int)
flights = []

print("Converting to list of flight objects...")
with alive_bar(n_flights) as bar:
    for i in range(0, n_flights):
        flights.append(generateFlight(df.iloc[i]))
        bar()

total_distance = calcTotalDistance(flights)

print("\nTaking samples...")
with alive_bar(n_samples) as bar:
    for i in range(0, n_samples):
        samples[i] = samplePoint(flights, total_distance)
        bar()

sample_indices = np.arange(0, n_samples, 1)

longitudes   = np.empty(n_samples)
latitudes    = np.empty(n_samples)
altitudes    = np.empty(n_samples)
times        = np.empty(n_samples, dtype = 'datetime64[s]')
aircrafts    = np.empty(n_samples, dtype = object)

print("\nDetermining sample characteristics...")
with alive_bar(n_samples) as bar:
    for i in range(0, n_samples):
        longitudes[i]   = flights[samples[i][0]]['longitude'][samples[i][1]]
        latitudes[i]    = flights[samples[i][0]]['latitude'][samples[i][1]]
        altitudes[i]    = flights[samples[i][0]]['altitude'][samples[i][1]]
        times[i]        = flights[samples[i][0]]['time'][samples[i][1]]
        aircrafts[i]    = flights[samples[i][0]].attrs['aircraft_type']
        bar()

print(f"\nTotal distance flown in dataset was {total_distance/1000:.2f} km.")

Converting to list of flight objects...


|████████████████████████████████████████| 100/100 [100%] in 0.1s (671.21/s) 

Taking samples...
|████████████████████████████████████████| 100/100 [100%] in 0.8s (121.72/s) 

Determining sample characteristics...
|████████████████████████████████████████| 100/100 [100%] in 0.0s (121828.94/s) 

Total distance flown in dataset was 840606.01 km.


In [7]:
"""fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.coastlines()
ax.set_global()
plt.scatter(longitudes, latitudes, marker='o', transform = ccrs.PlateCarree(), s=5)

plt.show()"""

"fig = plt.figure(figsize=(10, 10))\nax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())\nax.coastlines()\nax.set_global()\nplt.scatter(longitudes, latitudes, marker='o', transform = ccrs.PlateCarree(), s=5)\n\nplt.show()"

In [None]:
df_samples = pd.DataFrame(data = np.array([sample_indices, longitudes, latitudes, altitudes, times, aircrafts]).transpose(), columns = ["Index", "Longitude", "Latitude", "Altitude", "Time", "Aircraft Type"])
df_samples.to_csv('samples/samples.csv', sep='\t')

In [9]:
print(df_samples.sort_values('Time'))

   Index   Longitude   Latitude Altitude                Time Aircraft Type
44    44  141.958557 -26.066644  10668.0 2018-12-31 03:08:00          A332
32    32  137.200596 -21.033656  10668.0 2018-12-31 04:30:00          B788
17    17  147.272749 -29.931136  10668.0 2018-12-31 07:17:00          A388
64    64  126.614108  -8.500594  10668.0 2018-12-31 07:23:00          B788
92    92  132.458092  48.907544  10668.0 2018-12-31 07:24:00           NaN
..   ...         ...        ...      ...                 ...           ...
93    93  -33.676407  66.198309  10668.0 2019-01-01 07:48:00          B77W
36    36  -18.408629  61.491093  10668.0 2019-01-01 08:28:00          B77L
39    39   -9.966383  42.469372  10668.0 2019-01-01 08:33:00          B789
0      0   -2.934046  48.840488  10668.0 2019-01-01 09:15:00          B772
94    94 -123.614847  49.508365  10668.0 2019-01-01 18:13:00          B77W

[100 rows x 6 columns]
