# Generate histogram of releases
Code Author: Sahar El Abbadi
Date started: 2023-03-14
Date last edited: 2023-03-14

Notebook objective: to generate a histogram of Stanford releases, categorizing release by type:
- True Positive
- True Negative
- False Negative
- False Positive
- Missing data - overpass recorded in field but not as an overpass by the team
- Stanford Filtered
- Operator Filtered

In [9]:
# Setup
# Imports
from methods_source import load_overpass_summary, load_summary_files, clean_meter_column_names
import numpy as np
import pandas as pd



## Evaluate data that passes all QC filters

Select overpasses that pass both Stanford and Operator QC criteria. Generate dataframes for true positive, false positive, true negative, and false negative

In [10]:
# Load operator overpass data
cm_reported = load_overpass_summary(operator='Carbon Mapper', stage=1, strict_discard=False)

# Pass all QC filter
cm_qc_pass = cm_reported.query('qc_summary == "pass_all"')

# Select non-zero releases detected by operator
cm_tp, cm_fp, cm_tn, cm_fn = classify_confusion_categories(cm_qc_pass)


In [11]:
# Create bins for plot

threshold = 50
n_bins = 10

cm_count_tp = make_histogram_bins(cm_tp, threshold, n_bins)
cm_count_fp = make_histogram_bins(cm_fp, threshold, n_bins)
cm_count_fn = make_histogram_bins(cm_tn, threshold, n_bins)
cm_count_tn = make_histogram_bins(cm_fn, threshold, n_bins)

In [12]:
# Filtered by Stanford
cm_su_qc_fail = cm_reported.query('stanford_kept == False')
cm_count_su_fail = make_histogram_bins(cm_su_qc_fail, threshold, n_bins)

In [13]:
# Filtered by Carbon Mapper
# if qc_summary is 'fail_operator', this means it passed Stanford QC but not operator QC
cm_op_qc_fail = cm_reported.query('qc_summary == "fail_operator"')
cm_count_op_fail = make_histogram_bins(cm_op_qc_fail, threshold, n_bins)

In [17]:
# Identify data points where Stanford conducted a release
# Find data points where we have a flightradar overpass but we do not have an operator overpass

cm_meter_raw, ghg_meter_raw, kairos_meter_raw, mair_meter_raw = load_summary_files()

# cm_missing_raw = cm_meter_raw.query('PerformerOverpassID.isnull() == True & StanfordOverpassID.isnull() == False')
# cm_missing = clean_meter_column_names(cm_missing_raw, 'FlightradarOverpassID', 'flightradar')
cm_missing = find_missing_data(cm_meter_raw)
cm_count_missing = make_histogram_bins(cm_missing, threshold, n_bins)



# cm_count_missing = make_histogram_bins(cm_missing, threshold, n_bins)

In [19]:
print(cm_count_missing)
print(cm_missing)

   bin_median  n_data_points
0         2.5              1
1         7.5              0
2        12.5              0
3        17.5              0
4        22.5              0
5        27.5              0
6        32.5              0
7        37.5              0
8        42.5              0
9        47.5              0
     overpass_id  phase_iii   kgh_gas_30   kgh_gas_60   kgh_gas_90  \
70            71          0     0.000000     0.000000     0.000000   
71            72          1   328.525200   328.525200   328.525200   
112          113          1  1190.360903  1190.360903  1190.360903   

      kgh_ch4_30  release_rate_kgh   kgh_ch4_90  methane_fraction meter  \
70      0.000000          0.000000     0.000000          0.929933  None   
71    305.506534        305.506534   305.506534          0.929933    mc   
112  1142.587752       1142.587752  1142.587752          0.959867    pc   

     qc_su_discard  qc_su_discard_strict  altitude_meters      time  \
70               0          

In [16]:
print(cm_missing_raw)

           Date Time (UTC) - from Flightradar Time (UTC) - from Stanford  \
70   2022-10-28                      18:52:00                   18:52:00   
71   2022-10-28                      19:04:26                   19:04:26   
112  2022-10-31                      19:10:42                   19:10:49   

    Time (UTC) - from team  FlightradarOverpassID  StanfordOverpassID  \
70                     NaN                     71                  71   
71                     NaN                     72                  72   
112                    NaN                    113                 113   

     PerformerOverpassID  PhaseIII  \
70                   NaN         0   
71                   NaN         1   
112                  NaN         1   

     Last 30s (kg/h) - whole gas measurement - from Flightradar  \
70                                            0.000000            
71                                          328.525200            
112                                        1190.