In [1]:
import numpy as np
import pandas as pd

from scipy.stats import pearsonr, normaltest

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("meta_analysis_data.csv")
df.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,f1,f1_b,f1_t,rob
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,0.0,0.0,0.0,1
1,li,S11-1,9874,93.3,91.6,94.9,0.0,0.0,0.0,0.0,0.0,0.0,0
2,li,S11-2,13524,90.9,88.3,92.6,93.7,91.3,95.6,92.3,90.2,94.1,0
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,0.0,0.0,0.0,0
4,niiya,S14,199,93.5,90.0,96.9,63.5,58.0,69.0,75.6,70.8,80.4,1


# F1-score via Monte Carlo

In [3]:
# The 95% CI are not symmetric, but for simplicity, we just consider them to be.
# Also, as n > 100 in all cases, we can consider the t distribution to be normal, so we can take the 1.96 for the 95% CI.
# Top - bottom = 2 * 1.96 * std (it's actually SE = std/sqrt(n), but we want the std of the mean, not the std).
# Hence:

df["sens_std"] = (df["sens_t"] - df["sens_b"]) / (2 * 1.96)
df["prec_std"] = (df["prec_t"] - df["prec_b"]) / (2 * 1.96)

df = df.drop(columns=["sens_b", "sens_t", "prec_b", "prec_t"])

df.head()

Unnamed: 0,author,study,n,sens,prec,f1,f1_b,f1_t,rob,sens_std,prec_std
0,kaiume,S10,256,64.5,79.3,0.0,0.0,0.0,1,2.984694,2.806122
1,li,S11-1,9874,93.3,0.0,0.0,0.0,0.0,0,0.841837,0.0
2,li,S11-2,13524,90.9,93.7,92.3,90.2,94.1,0,1.096939,1.096939
3,lin,S12,1037,91.1,89.7,0.0,0.0,0.0,0,3.469388,1.020408
4,niiya,S14,199,93.5,63.5,75.6,70.8,80.4,1,1.760204,2.806122


The function below will be used to iteratively generate F1-score simulations for each of the studies.

In [4]:
def get_f1_simulation(row):
    
    # Number of samples.
    n = row["n"]

    # Distribution of sensitivity.
    sens = row["sens"]
    sens_std = row["sens_std"]
    sens_dist = np.random.normal(sens, sens_std, n)

    # Distribution of precision.
    prec = row["prec"]
    prec_std = row["prec_std"]
    prec_dist = np.random.normal(prec, prec_std, n)

    # Each pair of samples of sensitivity and precision generate a sample for the F1-score distribution.
    f1_dist = (2 * sens_dist * prec_dist) / (sens_dist + prec_dist)
    
    # Calculate mean and 95% CI for the F1-score.
    row["sim_f1"] = np.mean(f1_dist)
    f1_std = np.std(f1_dist)
    f1_se = f1_std
    row["sim_f1_b"] = row["sim_f1"] - 1.96 * f1_se
    row["sim_f1_t"] = row["sim_f1"] + 1.96 * f1_se
    
    # New columns to show relative error.
    row["relerr"] = 0
    row["relerr_b"] = 0
    row["relerr_t"] = 0
    
    if row["f1"] > 0:
        row["relerr"] = round((row["sim_f1"] - row["f1"]) / row["f1"] * 100, 2)
        row["relerr_b"] = round((row["sim_f1_b"] - row["f1_b"]) / row["f1_b"] * 100, 2)
        row["relerr_t"] = round((row["sim_f1_t"] - row["f1_t"]) / row["f1_t"] * 100, 2)

    return row

In [5]:
df = df.apply(get_f1_simulation, axis=1)
df = df.drop(columns=["sens_std", "prec_std"])

In [6]:
df_check = df[df["f1"] > 0]
df_result = df[(df["f1"] == 0) & (df["prec"] > 0)]

In [7]:
df_check[["relerr", "relerr_b", "relerr_t"]].describe()

Unnamed: 0,relerr,relerr_b,relerr_t
count,12.0,12.0,12.0
mean,0.0075,0.38,0.590833
std,0.114028,0.730442,0.901287
min,-0.13,-0.8,-0.5
25%,-0.05,-0.19,-0.2125
50%,-0.02,0.57,0.55
75%,0.04,0.8525,1.3925
max,0.27,1.31,2.16


We observe small relative errors in the studies that do provide F1-score. Therefore, we use the simulated values of F1-score for studies 10 and 12. (Study 11-1 is not included, as the precision is not reported for that model.)

In [8]:
df_result

Unnamed: 0,author,study,n,sens,prec,f1,f1_b,f1_t,rob,sim_f1,sim_f1_b,sim_f1_t,relerr,relerr_b,relerr_t
0,kaiume,S10,256,64.5,79.3,0.0,0.0,0.0,1,71.162416,66.841072,75.48376,0.0,0.0,0.0
3,lin,S12,1037,91.1,89.7,0.0,0.0,0.0,0,90.3702,86.832402,93.907998,0.0,0.0,0.0
