In [None]:
import sys
sys.path.append('./rtaUtils')

from rtaUtils import data_loading, data_preparation, paths

In [None]:
numeric_feat   = ['latitude', 'longitude', 'altitude', 'departureDelay', 
                  'vspeed', 'speed', 'day_of_week', 'track', 'wind_dir_degrees', 
                  'wind_speed_kt', 'visibility_statute_mi', 'max_temp', 'min_temp',
                  'clouds', 'hav_distance']
categoric_feat = ['time_of_day', 'operator', 'aerodromeOfDeparture', 'sky_status']
objective      = ['RTA']
num_features   = len(numeric_feat+categoric_feat)

ts_features = ['latitude', 'longitude', 'altitude', 'vspeed', 'speed', 'track', 'hav_distance']
nts_features = ['departureDelay', 'day_of_week', 'wind_dir_degrees','wind_speed_kt', 
                'visibility_statute_mi', 'max_temp', 'min_temp', 'time_of_day', 'operator', 
                'aerodromeOfDeparture', 'sky_status', 'clouds']

feat_dict = {
    'numeric':numeric_feat,
    'categoric':categoric_feat,
    'objective':objective,
}

# Sampling

In [None]:
sampling = 60

months = [f'2022{str(x).rjust(2,"0")}' for x in range(10,11)]
airport = '*'

for month in months:
    train_data = data_loading.load_final_data(month, 'train', airport)
    test_data  = data_loading.load_final_data(month, 'test', airport)
    val_data   = data_loading.load_final_data(month, 'val', airport)
        
    start_train = train_data.shape[0]
    start_test = test_data.shape[0]
    start_val = val_data.shape[0]
    
    train_data = data_preparation.sample_data(train_data, sampling)
    test_data = data_preparation.sample_data(test_data, sampling)
    val_data = data_preparation.sample_data(val_data, sampling)
    
    print(f'{month} | ' + 
          f'train: {start_train:>9,} -> {train_data.shape[0]:>9,} | ' + 
          f'test: {start_test:>7,} -> {test_data.shape[0]:>7,} | ' + 
          f'val: {start_val:>6,} -> {val_data.shape[0]:>6,}')
    
    train_data.to_parquet(paths.sampled_data_path / f's{sampling}/{month}.train.parquet')
    test_data.to_parquet(paths.sampled_data_path / f's{sampling}/{month}.test.parquet')
    val_data.to_parquet(paths.sampled_data_path / f's{sampling}/{month}.val.parquet')

# Windowing

In [None]:
sampling = 60
lookback = 32

months = [f'2022{str(x).rjust(2,"0")}' for x in range(1,10)]
airport = '*'

for month in months:
    data_preparation.generate_save_windows(month, lookback, sampling, feat_dict, airport)