# Summarize datasets for all operator results
Code author: Sahar H. El Abbadi
Date started: 2023-03-13
Date last edited: 2023-03-13

In [12]:
# Setup

# imports
from methods_source import generate_overpass_summary, load_clean_operator_reports, abbreviate_op_name, load_meter_data


## Generate summary tables

Generate overpass summary tables for each operator. Overpass summary tables extract all relevant information from operator reports and metering data that is needed for downstream analysis. These tables are saved as CSV files in 03_results > overpass_summary, using the format operator_stage_overpasses.csv.

Columns in the overpass summary tables are:
- overpass_id: this is the ID for the specific aircraft overpass, and matches PerformerExperimentID overpass number in the raw operator data
- overpass_datetime: date and time of overpass, in UTC time. This can be generated using the Flightradar GPS timestamp (timekeeper = 'flightradar'), Stanford's on the ground estimate of overhead time (timekeeper = 'stanford'), or using the timestamp according to the operator report (timekeeper = 'team'). Value used for all analysis is in paper is Flightradar
- zero_release: True if the release by Stanford is 0 kgh, False if greater than 0 kgh
- non_zero_release: True if release by Stanford is greater than 0 kgh, False if equal to 0 kgh
- operator_kept: True if this overpass passed the operator QC criteria
- stanford_kept: True if this overpass passed Stanford's QC criteria
- phase_iii: True if we provided this overpass to the operator during Phase III of unblinding
- pass_all_qc: True if passed both operator and Stanford QC
- fail_all_qc: True if this overpass failed both operator and Stanford QC
- operator_detected: True if operator detected a release. False if they did not
- operator_quantification: operator's quantification estimate as reported in operator report
- operator_lower: lower bound on operator's lower bound quantification estimate
- operator_upper: upper bound on operator's quantification estimate
- qc_summary: summarizes results of both operator and Stanford QC. Must be one of the following: 'pass_all', 'fail_stanford', 'fail_operator', 'fail_all'

In [13]:
# List all operators
operators = ['Carbon Mapper', 'GHGSat', 'Kairos LS23', 'Kairos LS25', 'Methane Air']
stages = [1, 2, 3]

# Strict discard:
strict_discard = False

# Load clean operator data
# format for naming: [operator]_stage

cm_1, cm_2, cm_3, ghg_1, ghg_2, ghg_3, kairos_ls23_1, kairos_ls25_1, kairos_ls23_2, kairos_ls25_2, kairos_ls23_3, kairos_ls25_3 = load_clean_operator_reports()

report_dictionary = {
    'cm_1': cm_1,
    'cm_2': cm_2,
    'cm_3': cm_3,
    'ghg_1': ghg_1,
    'ghg_2': ghg_2,
    'ghg_3': ghg_2,
    'kairos_ls23_1': kairos_ls23_1,
    'kairos_ls25_1': kairos_ls25_1,
    'kairos_ls23_2': kairos_ls23_2,
    'kairos_ls25_2': kairos_ls25_2,
    'kairos_ls23_3': kairos_ls23_3,
    'kairos_ls25_3': kairos_ls25_3,
}

# Load meter data
cm_meter, ghg_meter, kairos_meter, mair_meter = load_meter_data('flightradar')
meter_dictionary = {
    'cm_meter': cm_meter,
    'ghg_meter': ghg_meter,
    'kairos_meter': kairos_meter,
    'mair_meter': mair_meter,
}


operator_meter = load_meter_data('flightradar')
for operator in operators:
    for stage in stages:
        if operator == 'Methane Air':
            pass
        else:
            op_ab = abbreviate_op_name(operator)
            operator_report = report_dictionary[f'{op_ab}_{stage}']

            if (operator == 'Kairos LS23') or (operator == 'Kairos LS25'):
                operator_meter = meter_dictionary['kairos_meter']
            else:
                operator_meter = meter_dictionary[f'{op_ab}_meter']

            generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

## Valid quantification estimates

Number of overpasses that pass both QC criteria and are quantified as "detections" by the operator.

In [14]:
# Carbon Mapper Stage 1 data
operator = 'Carbon Mapper'
stage = 1
operator_report = report_dictionary['cm_1']
operator_meter = meter_dictionary['cm_meter']

# Set discard criteria to loose
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = False
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with non-strict Stanford QC: {length}')


Number of Carbon Mapper detects that pass QC with non-strict Stanford QC: 71


In [15]:
# Set discard criteria to strict
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = True
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with strict Stanford QC: {length}')

Number of Carbon Mapper detects that pass QC with strict Stanford QC: 67


In [16]:
# GHGSat Stage 1 data
operator = 'GHGSat'
stage = 1
operator_report = report_dictionary['ghg_1']
operator_meter = meter_dictionary['ghg_meter']

# Set discard criteria to loose
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = False
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with non-strict Stanford QC: {length}')

Number of GHGSat detects that pass QC with non-strict Stanford QC: 115


In [17]:
# Set discard criteria to strict
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = True
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with strict Stanford QC: {length}')

Number of GHGSat detects that pass QC with strict Stanford QC: 99


In [18]:
# Kairos LS 23 Stage 1 data
operator = 'Kairos LS23'
stage = 1
operator_report = report_dictionary['kairos_ls23_1']
operator_meter = meter_dictionary['kairos_meter']

# Set discard criteria to loose
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = False
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with non-strict Stanford QC: {length}')

Number of Kairos LS23 detects that pass QC with non-strict Stanford QC: 118


In [19]:
# Set discard criteria to strict
# For this discard criteria, Stanford allows up to 10% of the
strict_discard = True
operator_summary = generate_overpass_summary(operator, stage, operator_report, operator_meter, strict_discard)

valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_detected == True)]
length = len(valid_overpasses)
print(f'Number of {operator} detects that pass QC with strict Stanford QC: {length}')

Number of Kairos LS23 detects that pass QC with strict Stanford QC: 103


In [20]:
valid_overpasses = operator_summary.loc[(operator_summary.qc_summary == 'pass_all') & (operator_summary.operator_quantification.notna())]
print(valid_overpasses)

     overpass_id    overpass_datetime  zero_release  non_zero_release  \
0            1.0  2022-10-24 16:46:15          True             False   
1            2.0  2022-10-24 16:49:22          True             False   
3            4.0  2022-10-24 16:55:42         False              True   
4            5.0  2022-10-24 16:59:01         False              True   
5            6.0  2022-10-24 17:02:04         False              True   
..           ...                  ...           ...               ...   
327        328.0  2022-10-28 19:31:54         False              True   
333        334.0  2022-10-28 19:51:37         False              True   
336        337.0  2022-10-28 20:01:31         False              True   
343        344.0  2022-10-28 20:23:51         False              True   
346        347.0  2022-10-28 20:33:34         False              True   

     operator_kept  stanford_kept  phase_iii  pass_all_qc  fail_all_qc  \
0             True           True          0     

In [21]:
new_summary = operator_summary.loc[(operator_summary.non_zero_release == True)]
print(new_summary)

     overpass_id    overpass_datetime  zero_release  non_zero_release  \
2            3.0  2022-10-24 16:52:32         False              True   
3            4.0  2022-10-24 16:55:42         False              True   
4            5.0  2022-10-24 16:59:01         False              True   
5            6.0  2022-10-24 17:02:04         False              True   
6            7.0  2022-10-24 17:05:20         False              True   
..           ...                  ...           ...               ...   
344        345.0  2022-10-28 20:27:06         False              True   
345        346.0  2022-10-28 20:30:12         False              True   
346        347.0  2022-10-28 20:33:34         False              True   
347        348.0  2022-10-28 20:36:41         False              True   
348        349.0  2022-10-28 20:39:57         False              True   

     operator_kept  stanford_kept  phase_iii  pass_all_qc  fail_all_qc  \
2            False          False          1     

In [22]:
# test syntax

new_summary = operator_summary.query('qc_summary == "pass_all"')
print(new_summary)

     overpass_id    overpass_datetime  zero_release  non_zero_release  \
0            1.0  2022-10-24 16:46:15          True             False   
1            2.0  2022-10-24 16:49:22          True             False   
3            4.0  2022-10-24 16:55:42         False              True   
4            5.0  2022-10-24 16:59:01         False              True   
5            6.0  2022-10-24 17:02:04         False              True   
..           ...                  ...           ...               ...   
327        328.0  2022-10-28 19:31:54         False              True   
333        334.0  2022-10-28 19:51:37         False              True   
336        337.0  2022-10-28 20:01:31         False              True   
343        344.0  2022-10-28 20:23:51         False              True   
346        347.0  2022-10-28 20:33:34         False              True   

     operator_kept  stanford_kept  phase_iii  pass_all_qc  fail_all_qc  \
0             True           True          0     