## System 4: Combine Operations and Meter Data

_Note: This notebook takes longer to run than the other systems._

This system combines operations and meter data to enable 400Hz analysis.

**Inputs:**

- 1c: (CSV) Cleaned CSV/Pandas dataframe with power for each time period at each gate. Example: ![](screenshots/1c_sample_cleaned_consumption_data_output.png)

- 3b: Cleaned up operations data from System 3


**Output:**

- 4a: Combined data with each operation listed as metadata on each power consumption time block

In [43]:
# Dependencies

# For any missing libraries, just run (remove curly braces):
# !pip install {library_name}
 
## Required
import pandas as pd
import datetime as dt
import numpy as np
from tqdm import tqdm_notebook as tqdm

In [13]:
# Inputs, settings, and toggles

# Location of operations input data
input_1c_location = 'sample_data/1c_cleaned_consumption_data.csv'

# Location of operations input data
input_3b_location = 'sample_data/3b_sample_cleaned_operations_data.csv'


# Location of output data
output_4a_location = 'sample_data/4a_combined_operations_and_consumption_data.csv'

In [38]:
# Bring in cleaned consumption data
date_like_gates = ['Real_Timestamp',
                   'Row_Time_Delta']
gates = pd.read_csv(input_1c_location, parse_dates=date_like_gates, infer_datetime_format=True)
gates['Row_Time_Delta'] = pd.to_timedelta(gates['Row_Time_Delta']) # Force TimeDelta column (doesn't read well from CSV)
gates.head()

Unnamed: 0,Real_Timestamp,Power_kW,Cumulative_kWh,Row_Time_Delta,Gate
0,2019-09-01 00:10:00,12.96,924225.66,00:05:00,Gate_D50B
1,2019-09-01 00:15:00,13.08,924226.75,00:05:00,Gate_D50B
2,2019-09-01 00:20:00,13.08,924227.84,00:05:00,Gate_D50B
3,2019-09-01 00:25:00,12.84,924228.91,00:05:00,Gate_D50B
4,2019-09-01 00:30:00,12.96,924229.99,00:05:00,Gate_D50B


In [29]:
# Bring in cleaned operations data
date_like_ops = ['Actual Landing Time (Aerobahn)',
             'Scheduled In Block Time (Aerobahn)',
             'Scheduled Off Block Time (Aerobahn)',
             'in_block_time_dt',
             'off_block_time_dt']
ops = pd.read_csv(input_3b_location, parse_dates=date_like_ops)
ops.head()

Unnamed: 0,calculated_gate,Carrier Group,Registration,Model,International or Domestic Indicator,Actual Landing Time (Aerobahn),Scheduled In Block Time (Aerobahn),Origination Airport,Destination Airport,Scheduled Off Block Time (Aerobahn),total_taxi_time_from_arrival,total_taxi_time_from_departure,call_sign_arrival,call_sign_departure,in_block_time_dt,off_block_time_dt,block_time_delta
0,*21A,Alaska Airlines,N191SY,E75,Domestic,2019-10-10 07:33:00,2019-10-10 07:40:00,PSP,DAL,2019-10-10 08:55:00,0:05:59,,SKW3327,SKW3404,2019-10-10 07:41:00,2019-10-10 09:50:00,129.0
1,*21A,Alaska Airlines,N171SY,E75,Domestic,2019-10-11 07:19:00,2019-10-11 07:40:00,PSP,DAL,2019-10-11 08:55:00,0:04:19,,SKW3327,SKW3404,2019-10-11 07:25:00,2019-10-11 08:56:00,91.0
2,*22A,Alaska Airlines,N193SY,E75,Domestic,2019-10-18 21:57:00,2019-10-18 22:10:00,PSP,SNA,2019-10-19 07:00:00,0:08:08,,SKW3395,SKW3370,2019-10-18 22:14:00,2019-10-19 06:58:00,524.0
3,*22A,Alaska Airlines,N405SY,E75,Domestic,2019-10-19 07:30:00,2019-10-19 07:40:00,PSP,DAL,2019-10-19 08:55:00,0:08:26,,SKW3327,SKW3404,2019-10-19 07:40:00,2019-10-19 08:56:00,76.0
4,*22A,Alaska Airlines,N195SY,E75,Domestic,2019-11-07 22:48:00,2019-11-07 21:50:00,PSP,PDX,2019-11-08 09:00:00,0:08:08,,SKW3383,SKW3325,2019-11-07 23:04:00,2019-11-08 08:59:00,595.0


In [44]:
# [Long operation] Combine operations and consumption data
# On a standard machine, this runs at approx. 7 operations per second

gates_ops = gates.copy()
ops_chunks = []
print("Starting long combination operation...")
for i, op in tqdm(ops.iterrows()):
    gate_filter = gates['Gate'] == op['calculated_gate']
    start_filter = (gates['Real_Timestamp']) >= op['in_block_time_dt']
    end_filter = gates['Real_Timestamp'] < (op['off_block_time_dt'] + gates['Row_Time_Delta'])
    combined_filter = start_filter & gate_filter & end_filter
    subset = gates[combined_filter].copy()
    in_calc_1 = ((subset['Real_Timestamp'])-op['in_block_time_dt']).dt.total_seconds()/subset['Row_Time_Delta'].dt.total_seconds()
    in_calc_2 = np.minimum(np.ones(subset.shape[0]), np.maximum(np.zeros(subset.shape[0]), in_calc_1))
    out_calc_1 = ((subset['Real_Timestamp'])-op['off_block_time_dt']).dt.total_seconds()/subset['Row_Time_Delta'].dt.total_seconds()
    out_calc_2 = np.ones(subset.shape[0]) - out_calc_1
    out_calc_3 = np.minimum(np.ones(subset.shape[0]), np.maximum(np.zeros(subset.shape[0]), out_calc_2))
    final_calc = np.minimum(in_calc_2, out_calc_3)
    subset['plane_in_ratio'] = final_calc
    cols_to_add = ['Actual Landing Time (Aerobahn)', 'Carrier Group',
       'Destination Airport', 'International or Domestic Indicator', 'Model',
       'Origination Airport', 'Registration',
       'Scheduled In Block Time (Aerobahn)',
       'Scheduled Off Block Time (Aerobahn)', 
       'call_sign_arrival', 'call_sign_departure', 'in_block_time_dt',
       'off_block_time_dt', 'total_taxi_time_from_arrival',
       'total_taxi_time_from_departure', 'block_time_delta']
    
    for col in cols_to_add:
        subset[col] = op[col]

    ops_chunks.append(subset)

print("Combination finished.")
print("Concatenating operations into one dataframe...")
combined = pd.concat(ops_chunks)
combined.head(20)
print("Concatenation complete.")
print("Storing file as CSV...")
export_csv = combined.to_csv(output_4a_location, index = None, header = True)
print("File successfully exported.")


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


