## Analysis for paper writing
Author: Sahar H. El Abbadi
Date: 2023-04-12


## Summarize distribution of overpasses

Total number of overpasses, missing data, max and min given to each team, number that passed Stanford QC

Missing overpasses: documented as an overpass by Stanford, but not included in Operator Report file

#### Carbon Mapper overpass summary for results

In [None]:
from writing_analysis import operator_releases_summary_stats
# Carbon Mapper
operator = 'Carbon Mapper'
operator_releases_summary_stats(operator)

### GHGSat summary for results

In [None]:
from writing_analysis import operator_releases_summary_stats
# GHGSat
operator = 'GHGSat'
operator_releases_summary_stats(operator)

In [None]:
# Check GHGSat's internal QC for points they quantified but flagged as sub-optimal

from methods_source import load_operator_report_dictionary, load_overpass_summary
operator = 'GHGSat'
ghg_report = load_operator_report_dictionary()['ghg_1']

# QC flag of GH-2 means that emissions were quantified despite sub-optimal conditions.
# QC flag of GH-4 means diffuse emissions visible over site

ghg_report_quantified = ghg_report.loc[ghg_report.QuantifiedPlume == True]
qc_mask = (ghg_report_quantified['QCFlag'] == 'GH-2') | (ghg_report_quantified['QCFlag'] == 'GH-4')
ghg_report_poor_conditions = ghg_report_quantified.loc[qc_mask]
# list of overpass ID's with poor conditions but quantified
poor_condition_overpasses = ghg_report_poor_conditions['overpass_id']
overpasses = load_overpass_summary(operator, stage=1)

# Find the overpasses in the overpass summary, to see which ones of them pass Stanford QC
poor_condition_overpass_summary = overpasses[overpasses['overpass_id'].isin(poor_condition_overpasses)]

# Select overpasses that pass Stanford QC
poor_condition_overpasses_pass_SU = poor_condition_overpass_summary.loc[poor_condition_overpass_summary.stanford_kept == True]

print(f'Number of overpasses that pass SU quality control but are flagged by GHGSat as sub-optimal conditions for quantification: {len(poor_condition_overpasses_pass_SU)}')

In [None]:
# Further investigate GHGSat false negatives, as they detect small releases but also had some higher false negatives
from methods_source import load_overpass_summary, classify_confusion_categories

from writing_analysis import print_overpass_info

operator = 'GHGSat'
overpasses = load_overpass_summary(operator, stage=1)
pass_su_qc = overpasses.loc[overpasses.qc_summary == 'pass_all']
true_positives, false_positives, true_negatives, false_negatives = classify_confusion_categories(pass_su_qc)

# How many of these releases were less than 5 kgh?
small_false_negatives = false_negatives.loc[false_negatives['release_rate_kgh'] < 5]
print(f'Number of false negatives that were less than 5 kgh: {len(small_false_negatives)}\n')

# How many of these releases were less than 5 kgh?
large_false_negatives = false_negatives.loc[false_negatives['release_rate_kgh'] > 5]
print(f'False negatives that were above 5 kgh:\n')
print(large_false_negatives['release_rate_kgh'])
print(f'\n')

# print details of each of hte false negatives that is over 5 kgh
for index, row in large_false_negatives.iterrows():
    print_overpass_info(row)

### Kairos summary for results

In [None]:
from writing_analysis import operator_releases_summary_stats
# GHGSat
operator = 'Kairos'
operator_releases_summary_stats(operator)

In [None]:
# Additional Kairos analysis
from methods_source import load_overpass_summary
# How many overpasses were at or below 10 kg/hr
operator = 'Kairos'
overpasses = load_overpass_summary(operator, stage=1)
pass_all_qc = overpasses.loc[overpasses.pass_all_qc == True]

lower_end = pass_all_qc.loc[pass_all_qc.release_rate_kgh < 15]
print(f'Total number of releases for {operator} less than 15 kgh: {len(lower_end)} ')

### MethaneAIR summary for results


In [None]:
### MethaneAIR
from writing_analysis import operator_releases_summary_stats
operator = 'MethaneAIR'
operator_releases_summary_stats(operator)

### Scientific Aviation summary for results


In [None]:
### SciAV
from writing_analysis import operator_releases_summary_stats
operator = 'Scientific Aviation'
operator_releases_summary_stats(operator, strict_discard=True)

In [None]:
# Additional Scientific Aviation analysis
from methods_source import load_overpass_summary
# Which dates contained SCIAV qc'ed points
operator = 'Scientific Aviation'
overpasses = load_overpass_summary(operator, strict_discard=True, stage=1)

relevant_col = 'overpass_datetime'
fail_sciav_qc_dates = overpasses.loc[overpasses.operator_kept == False][relevant_col]
print(fail_sciav_qc_dates)

## Analyze Error Bar Profile

What fraction of quantification estimates have upper or lower bounds that cross the parity line? Functions for testing parity apply the following filters to the operator overpasses:
  - Must pass all QC
  - Operator quantification is not null to not include NAN values as False (for not crossing parity line)

In [4]:
from writing_analysis import test_parity_all_stages

# Carbon Mapper
operator = 'Carbon Mapper'
strict_discard = False
test_parity_all_stages(operator)

# GHGSat
operator = 'GHGSat'
strict_discard = False
test_parity_all_stages(operator)

# MethaneAIR
operator = 'MethaneAIR'
strict_discard = False
test_parity_all_stages(operator)

# Scientific Aviation
operator = 'Scientific Aviation'
strict_discard = True
test_parity_all_stages(operator)

Fraction of Carbon Mapper Stage 1 overpasses with 95% CI that encompasses parity line: 89%
Fraction of Carbon Mapper Stage 2 overpasses with 95% CI that encompasses parity line: 76%
Fraction of Carbon Mapper Stage 3 overpasses with 95% CI that encompasses parity line: 80%


Fraction of GHGSat Stage 1 overpasses with 95% CI that encompasses parity line: 93%
Fraction of GHGSat Stage 2 overpasses with 95% CI that encompasses parity line: 84%
Fraction of GHGSat Stage 3 overpasses with 95% CI that encompasses parity line: 84%


Fraction of MethaneAIR Stage 1 overpasses with 95% CI that encompasses parity line: 85%


Fraction of Scientific Aviation Stage 1 overpasses with 95% CI that encompasses parity line: 73%




In [3]:
from writing_analysis import calc_parity_intersection
import pathlib
operator = 'Carbon Mapper'
cm_3_df = calc_parity_intersection(operator, 3, strict_discard=False)
cm_3_df.to_csv(pathlib.PurePath('03_results', 'paper_analysis', 'cm_3_df_parity_compare.csv'))

Fraction of Carbon Mapper Stage 3 overpasses with 95% CI that encompasses parity line: 80%
