# Confidence intervals for power from point on ROC curve

## (1) Train a model and obtain scores on a test set


In [1]:
# Load modules
import numpy as np
from sklearn.linear_model import LogisticRegression
from trialML.power import twosided_classification

# Generate data
np.random.seed(1)
n, p = 150, 10
k1, k2 = 50, 100
X, y = np.random.randn(n, p), np.random.binomial(1, 0.5, n)
X_train, y_train, X_test, y_test = X[:k1], y[:k1], X[k1:k2], y[k1:k2]
X_trial, y_trial = X[k1:], y[k1:]
mdl = LogisticRegression(penalty='none', solver='lbfgs')
mdl.fit(X=X_train, y=y_train)
# test set scores
s_test = mdl.predict_proba(X_test)[:,1]
s_test = np.log(s_test / (1-s_test))  # logit transform

## (2) Select a point on the ROC curve when sensitivity equals 50%

In [2]:
m1 = 'sensitivity'
m2 = 'specificity'
alpha = 0.05  # type-I error rate for test
gamma1 = 0.5  # for sensitivity
power_2s = twosided_classification(m1, m2, alpha)
power_2s.set_threshold(y=y_test, s=s_test, gamma1=gamma1)

## (3) Get performance range on test set

In [3]:
df_gamma = power_2s.statistic_CI(y=y_test, s=s_test, threshold=power_2s.threshold)
df_gamma.round(3)

Unnamed: 0,cidx,m,gamma_hat,den,ratio,gamma_lb,gamma_ub
0,0,1,0.522,23,0.46,0.306,0.732
1,1,2,0.481,27,0.54,0.287,0.681


## (4) Estimate power range

In [4]:
n_trial = len(X_trial)
margin = 0.05
df_power = power_2s.get_power(n_trial=n_trial, margin=margin, adjust=True)
df_power.round(3)

Unnamed: 0,cidx,m,gamma_hat,ratio,gamma_lb,gamma_ub,threshold,gamma0,power_point,power_lb,power_ub
0,0,1,0.522,0.46,0.306,0.732,0.448,0.472,0.167,0.0,0.983
1,1,2,0.481,0.54,0.287,0.681,0.448,0.431,0.185,0.0,0.985


## (5) Run trial

In [5]:
gamma0 = df_gamma['gamma_hat'] - margin
s_trial = mdl.predict_proba(X_trial)[:,1]
s_trial = np.log(s_trial / (1-s_trial))  # logit transform
df_trial = power_2s.statistic_pval(y=y_trial, s=s_trial, gamma0=gamma0)
df_trial.round(3)

Unnamed: 0,cidx,m,gamma_hat,den,z,pval,reject
0,0,1,0.532,47,0.826,0.204,False
1,1,2,0.415,53,-0.241,0.595,False
