# Generate Probability of Detection for Operators
Code author: Sahar H. El Abbadi
Date started: 2022-02-23
Date last edited: 2022-02-27

In [1]:
# Setup

# Imports
import pandas as pd
import numpy as np
from data_manipulation_methods import load_clean_data, load_meter_data, merge_meter_and_operator_data

# Load clean operator data
# format for naming: [operator]_stage

cm_1, cm_2, ghg_1, ghg_2, kairos_1_ls23, kairos_1_ls25, kairos_2_ls23, kairos_2_ls25, kairos_3_ls23, kairos_3_ls25 = load_clean_data()

# Load meter data
# cm_meter = metered data for Carbon Mapper overpasses
# ghg_meter = metered data for GHGSat overpasses
# kairos_meter = metered data for Kairos overpasses

cm_meter, ghg_meter, kairos_meter = load_meter_data()

In [20]:
# Carbon Mapper probability of detection

n_bins = 5 # number of bins
threshold = 50 # highest release rate in kgh to show in detection threshold graph

cm_df = merge_meter_and_operator_data(cm_1, cm_meter)

# Make column with easier name for coding for now.
cm_df['release_rate_kgh'] = cm_df['Last 60s (kg/h) - from Stanford']


# Determine whether each overpass below the threshold value was detected
cm_detection = pd.DataFrame()
cm_detection['overpass_id'] = cm_df.PerformerExperimentID
cm_detection['non_zero_release'] = cm_df.release_rate_kgh!=0 # True if we conducted a release
cm_detection['operator_detected'] = cm_df.Detected
cm_detection['release_rate_kgh'] = cm_df.release_rate_kgh

# Select overpasses that are below the threshold of interest AND where release is non-zero
cm_detection = cm_detection.loc[cm_detection.release_rate_kgh <= threshold].loc[cm_detection.non_zero_release == True]

# Create bins for plot
bins = np.linspace(0, threshold, n_bins+1)
detection_probability = np.zeros(n_bins)

# What are you initializing here, Yulia? Are these all trackers of some sort?
bin_size, bin_num_detected = np.zeros(n_bins).astype('int'), np.zeros(n_bins).astype('int')
bin_median = np.zeros(n_bins)
bin_two_sigma = np.zeros(n_bins)
two_sigma_upper, two_sigma_lower = np.zeros(n_bins), np.zeros(n_bins)

# For each bin, find number of data points and detection probability

for i in range(n_bins):

    # Set boundary of bin
    bin_min = bins[i]
    bin_max = bins[i+1]
    bin_median[i] = (bin_min+bin_max)/2

    # Select data within the bin range
    binned_data = cm_detection.loc[cm_detection.release_rate_kgh < bin_max].loc[cm_detection.release_rate_kgh>=bin_min]

    # Count the total number of overpasses detected within each bin
    bin_num_detected[i] = binned_data.operator_detected.sum()

    n = len(binned_data)
    bin_size[i] = n # this is the y-value for the bin in the plot
    p = binned_data.operator_detected.sum()/binned_data.shape[0] #df.shape[0] gives number of rows
    detection_probability[i] = p

    # Standard Deviation of a binomial distribution
    sigma = np.sqrt(p*(1-p)/n)
    bin_two_sigma[i] = 2*sigma

    # Find the lower and upper bound defined by two sigma
    two_sigma_lower[i] = 2*sigma
    two_sigma_upper[i] = 2*sigma
    if 2*sigma + p > 1:
        two_sigma_upper[i] = 1-p # probability cannot exceed 1
    if p - 2*sigma < 0 :
        two_sigma_lower[i] = p # if error bar includes zero, set lower bound to p?


detection_prob = pd.DataFrame({
    "bin_median": bin_median,
    "detection_prob_mean": detection_probability,
    "detection_prob_two_sigma_upper": two_sigma_upper,
    "detection_prob_two_sigma_lower": two_sigma_lower,
    "n_data_points": bin_size,
    "n_detected": bin_num_detected})

print(detection_prob)
# Function will output cm_detection and detection_prob


   bin_median  detection_prob_mean  detection_prob_two_sigma_upper  \
0         5.0                 0.25                        0.433013   
1        15.0                 1.00                        0.000000   
2        25.0                 1.00                        0.000000   
3        35.0                 1.00                        0.000000   
4        45.0                 1.00                        0.000000   

   detection_prob_two_sigma_lower  n_data_points  n_detected  
0                            0.25              4           1  
1                            0.00              4           4  
2                            0.00              8           8  
3                            0.00              6           6  
4                            0.00              6           6  
