# Logistic Regression for probability of detection

Fit a logistic regression to detection data.


In [1]:
# First, code for generating wind data - example code
from methods_source import summarize_wind_conditions, calc_avg_windspeed
import datetime

test_1 = datetime.datetime(2022, 10, 10, 18, 16, 10)
test_2 = datetime.datetime(2022, 10, 10, 18, 17, 10)

wind_test1 = summarize_wind_conditions(test_1, test_2)

wind_test2 = calc_avg_windspeed(test_1, direction='forward')

In [2]:
from methods_source import load_overpass_summary, feet_per_meter, calc_avg_windspeed
import pandas as pd
operator = load_overpass_summary('Kairos', 1, strict_discard=False)
feet_per_meter = feet_per_meter()

# Include all variables used in Conrad et al., 2023 even if I won't ultimately use them
op_POD_variables = pd.DataFrame()
op_POD_variables['overpass_id'] = operator['overpass_id']
op_POD_variables['overpass_datetime'] = operator['overpass_datetime']
op_POD_variables['h'] = operator['altitude_feet'] / feet_per_meter # flight altitude in m
op_POD_variables['u'] = operator['overpass_datetime'].apply(calc_avg_windspeed) # windspeed in m/s
op_POD_variables['Q'] = operator['release_rate_kgh'] # release rate in kg / hr
op_POD_variables['D'] = operator['operator_detected'].apply(int) # operator detected as 0 or 1

# Code for Conrad et al., 2023
# def make_predictor_function(h, u, Q):
#     def my_predictor(phi1, phi2, phi3, phi4, phi5, phi6):
#         return (phi1 * (Q - phi2) ** phi3) / (((h/1000)**phi4) * ((u - phi5) ** phi6))
#
#     return my_predictor
# Next:
# Generate "my_predictor" for each overpass
# Generate Di ln(Fi) + (1 - Di) ln(1 - Fi) for each overpass
# Sum the column with


In [18]:
# Apply logistic regression
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Filter op_POD_variables to only include data below a threshold max kgh
threshold = 30 # kgh
op_POD_variables = op_POD_variables.loc[op_POD_variables.Q <= threshold]

# Reshape required for Logistic function

x = op_POD_variables['Q'].values.reshape(-1, 1)
y = op_POD_variables['D'].values.reshape(-1, 1).ravel()
model = LogisticRegression(solver='liblinear', random_state=0)
model.fit(x, y)

print(f'Model intercept (bo): {model.intercept_}\nModel slope (b1): {model.coef_}')

Model intercept (bo): [-2.04562269]
Model slope (b1): [[0.32951666]]


In [10]:
np.arange(0, 30, 0.1)

array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
        5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,
        6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,
        7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,
        8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,
        9.9, 10. , 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9,
       11. , 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12. ,
       12.1, 12.2, 12.3, 12.4, 12.5, 12.6, 12.7, 12.8, 12.9, 13. , 13.1,
       13.2, 13.3, 13.4, 13.5, 13.6, 13.7, 13.8, 13.9, 14. , 14.1, 14.2,
       14.3, 14.4, 14.5, 14.6, 14.7, 14.8, 14.9, 15