In [1]:
from evaluate_performance import Evaluate

sens_base32, spec_base32 = Evaluate.sens_spec_range(file_name = 'rs_tf_dx')
sens_base1536, spec_base1536 = Evaluate.sens_spec_range(file_name = 'rs_tf1536_dx')
sens_gpt1536, spec_gpt1536 = Evaluate.sens_spec_range(file_name = 'rs_tf_gpt1536_dx')


In [2]:
import matplotlib.pyplot as plt
import numpy as np
def plot_sen_spec(test, color, month = '3m', smoothing = False):
    
    spec, sens = [], []
    for k, v in test.items():
        spec.append(k)
        sens.append(v[month])
    plt.plot(spec, sens, '.', color = color[0])
    if smoothing:
        # Fit a polynomial of degree 3 to the data
        coefficients = np.polyfit(spec, sens, 2)
        polynomial = np.poly1d(coefficients)

        # Generate x values for plotting the polynomial
        x_smooth = np.linspace(spec.min(), spec.max(), 500)
        y_smooth = polynomial(x_smooth)
        plt.plot(x_smooth, y_smooth, color=color[1])   

plot_sen_spec(sens_base32, ['gray',''], month = '3m')
plot_sen_spec(sens_base1536, ['k',''], month = '3m')
plot_sen_spec(sens_gpt1536, ['r',''], month = '3m')
plt.xlabel('Specificity', fontsize = 15)
plt.ylabel('Sensitivity', fontsize = 15)    
plt.legend(labels = ['$Baseline_{\t{fine\_tune32}}$', 
                     '$Baseline_{\t{fine\_tune1536}}$',
                     '$GPT4_{\t{fine\_tune1536}}$',],)
plt.show()

### AUC three decimals

In [None]:
import pickle
with open('output/rs_tf_dx.pickle', 'rb') as handle:
    base32 = pickle.load(handle)
    
with open('output/rs_tf1536_dx.pickle', 'rb') as handle:
    base1536 = pickle.load(handle)
with open('output/rs_tf_gpt1536_dx.pickle', 'rb') as handle:
    gpt1536 = pickle.load(handle)

In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve
def roc_pr(res, plot = 'yes'):
    label2month = {5:'3m', 4:'6m', 3:'12m', 2:'36m', 1:'60m'}
    roc_collect = {}
    tpr_collect, fpr_collect = {}, {}
    
    for k,v in res.items():
        roc_collect[k] = {}
        tpr_collect[k], fpr_collect[k] = {}, {}
        for la_sum, m in label2month.items():
            fpr, tpr, roc_thre = roc_curve(np.array(v[m]['labels'])[:,5-la_sum], np.array(v[m]['raw_scores'])[:,5-la_sum])

            roc_auc = auc(fpr, tpr)
            roc_label=k+': {:.3f}'.format(roc_auc)
            roc_collect[k][m] = roc_label
            tpr_collect[k][m] = tpr
            fpr_collect[k][m] = fpr

In [None]:
res = {'$Baseline_{\t{fine\_tune32}}$':base32,
       '$Baseline_{\t{fine\_tune1536}}$':base1536,
       '$GPT_{\t{fine\_tune1536}}$':gpt1536}

In [None]:
roc = roc_pr(res)
roc

### Baseline vs GPT

In [None]:
diff = {}
for m in ['3m', '6m', '12m', '36m', '60m']:
    diff[m] = {}
for spec, m_sens in sens_gpt1536.items():
    for m in ['3m', '6m', '12m', '36m', '60m']:
        diff[m][spec] = (m_sens[m]/sens_base1536[spec][m]-1)*100

In [None]:
for m in ['3m', '6m', '12m', '36m', '60m']:
    print(m, np.mean(list(diff[m].values())[-10:]))

In [None]:
import numpy as np
import scipy.stats as stats

# Sample list of values
data = list(diff['3m'].values())[-10:]

# Step 1: Calculate the mean
mean = np.mean(data)

# Step 2: Calculate the standard deviation
std_dev = np.std(data, ddof=1)  # Using Bessel's correction (ddof=1)

# Step 3: Calculate the standard error of the mean (SEM)
n = len(data)
sem = std_dev / np.sqrt(n)

# Step 4: Find the critical value (t*) for a 95% confidence interval
confidence_level = 0.95
degrees_of_freedom = n - 1
t_critical = stats.t.ppf((1 + confidence_level) / 2, degrees_of_freedom)

# Step 5: Calculate the margin of error
margin_of_error = t_critical * sem

# Step 6: Construct the confidence interval
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

print("95% Confidence Interval:", confidence_interval)