In [1]:
import numpy as np
import pandas as pd

from scipy.stats import ttest_ind

import matplotlib.pyplot as plt

# Fix seed for reproducibility.
np.random.seed(42)

In [2]:
df = pd.read_csv("data/meta_analysis_data.csv")
input_columns = df.columns
df.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,f1,f1_b,f1_t,rob
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,0.0,0.0,0.0,1
1,li,S11-1,9874,93.3,91.6,94.9,0.0,0.0,0.0,0.0,0.0,0.0,0
2,li,S11-2,13524,90.9,88.3,92.6,93.7,91.3,95.6,92.3,90.2,94.1,0
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,0.0,0.0,0.0,0
4,niiya,S14,199,93.5,90.0,96.9,63.5,58.0,69.0,75.6,70.8,80.4,1


In [3]:
# One study does not provide a value for precision, so we cannot simulate its F1-score.
df_prec_0 = df[df["prec"] == 0]
df = df[df["prec"] > 0]

# Split into train and test.
df_train = df[df["f1"] > 0]
df_test = df[df["f1"] == 0]

df_train.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,f1,f1_b,f1_t,rob
2,li,S11-2,13524,90.9,88.3,92.6,93.7,91.3,95.6,92.3,90.2,94.1,0
4,niiya,S14,199,93.5,90.0,96.9,63.5,58.0,69.0,75.6,70.8,80.4,1
5,zhou_D,S24-1,480,90.4,89.8,91.9,85.3,80.5,89.3,87.8,84.7,90.6,1
6,zhou_D,S24-2,214,94.9,89.7,97.8,78.4,72.2,83.2,85.9,82.4,88.6,0
7,zhou_D,S24-3,266,86.8,84.1,89.8,92.4,89.5,95.5,89.5,86.8,91.4,1


In [4]:
df_test = df_test.drop(columns=["f1", "f1_b", "f1_t"])

df_test.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,rob
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,1
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,0


# F1-score via Monte Carlo

In [5]:
# The 95% CI are not symmetric, but for simplicity, we just consider them to be.
# Also, as n > 100 in all cases, we can consider the t distribution to be normal, so we can take the 1.96 for the 95% CI.
# Top - bottom = 2 * 1.96 * std (it's actually SE = std/sqrt(n), but we want the std of the mean, not the std).

def get_std(df, column):
    df = df.copy()
    df[f"{column}_std"] = (df[f"{column}_t"] - df[f"{column}_b"]) / (2 * 1.96)
    return df

In [6]:
df_train = get_std(df_train, "sens")
df_train = get_std(df_train, "prec")
df_train = get_std(df_train, "f1")

df_test = get_std(df_test, "sens")
df_test = get_std(df_test, "prec")

df_train.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,f1,f1_b,f1_t,rob,sens_std,prec_std,f1_std
2,li,S11-2,13524,90.9,88.3,92.6,93.7,91.3,95.6,92.3,90.2,94.1,0,1.096939,1.096939,0.994898
4,niiya,S14,199,93.5,90.0,96.9,63.5,58.0,69.0,75.6,70.8,80.4,1,1.760204,2.806122,2.44898
5,zhou_D,S24-1,480,90.4,89.8,91.9,85.3,80.5,89.3,87.8,84.7,90.6,1,0.535714,2.244898,1.505102
6,zhou_D,S24-2,214,94.9,89.7,97.8,78.4,72.2,83.2,85.9,82.4,88.6,0,2.066327,2.806122,1.581633
7,zhou_D,S24-3,266,86.8,84.1,89.8,92.4,89.5,95.5,89.5,86.8,91.4,1,1.454082,1.530612,1.173469


In [7]:
df_test.head()

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,rob,sens_std,prec_std
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,1,2.984694,2.806122
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,0,3.469388,1.020408


The function below will be used to iteratively generate F1-score simulations for each of the studies.

In [8]:
def get_f1_simulation(row, test):
    
    # Distribution of sensitivity.
    sens_dist = np.random.normal(row["sens"], row["sens_std"], row["n"])

    # Distribution of precision.
    prec_dist = np.random.normal(row["prec"], row["prec_std"], row["n"])

    # Each pair of samples of sensitivity and precision generate a sample for the F1-score distribution.
    sim_f1_dist = (2 * sens_dist * prec_dist) / (sens_dist + prec_dist)
    
    if not test:
        # Distribution of F1-score.
        f1_dist = np.random.normal(row["f1"], row["f1_std"], row["n"])

        # Welch's t-test (unequal variances).
        p_value = ttest_ind(sim_f1_dist, f1_dist, equal_var=False).pvalue
        # If p_value > 0.05, we cannot reject the null hypothesis -> F1 dist and simulated F1 dist are equal.
        row["p_value > 0.05"] = 1 if p_value > 0.05 else 0
    else:
        # Calculate mean and 95% CI for the F1-score.
        row["f1"] = np.mean(sim_f1_dist)
        f1_std = np.std(sim_f1_dist)
        # We choose to work with the STD of the mean, not the random variable itself.
        # So we don't divide by sqrt(n) in the next step:
        f1_se = f1_std
        row["f1_b"] = row["f1"] - 1.96 * f1_se
        row["f1_t"] = row["f1"] + 1.96 * f1_se
        
        # Keep only 1 decimal point.
        row["f1"] = np.round(row["f1"], 1)
        row["f1_b"] = np.round(row["f1_b"], 1)
        row["f1_t"] = np.round(row["f1_t"], 1)
    
    return row

In [9]:
df_train = df_train.apply(lambda row: get_f1_simulation(row, test=False), axis=1)
df_train[["p_value > 0.05"]].describe()

Unnamed: 0,p_value > 0.05
count,12.0
mean,0.916667
std,0.288675
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In average, our F1 simulations match the given mean and 95% CI values, with a 91.67% success rate. However, we made the assumption than the given mean and 95% CI were coming from normal distributions.

In [10]:
df_test = df_test.apply(lambda row: get_f1_simulation(row, test=True), axis=1)
df_test

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,rob,sens_std,prec_std,f1,f1_b,f1_t
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,1,2.984694,2.806122,71.1,67.1,75.2
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,0,3.469388,1.020408,90.4,86.7,94.0


### Output.

In [11]:
output = pd.concat([df_prec_0, df_train[input_columns], df_test[input_columns]])
output = output.sort_index()
output

Unnamed: 0,author,study,n,sens,sens_b,sens_t,prec,prec_b,prec_t,f1,f1_b,f1_t,rob
0,kaiume,S10,256,64.5,58.6,70.3,79.3,73.8,84.8,71.1,67.1,75.2,1
1,li,S11-1,9874,93.3,91.6,94.9,0.0,0.0,0.0,0.0,0.0,0.0,0
2,li,S11-2,13524,90.9,88.3,92.6,93.7,91.3,95.6,92.3,90.2,94.1,0
3,lin,S12,1037,91.1,84.3,97.9,89.7,87.7,91.7,90.4,86.7,94.0,0
4,niiya,S14,199,93.5,90.0,96.9,63.5,58.0,69.0,75.6,70.8,80.4,1
5,zhou_D,S24-1,480,90.4,89.8,91.9,85.3,80.5,89.3,87.8,84.7,90.6,1
6,zhou_D,S24-2,214,94.9,89.7,97.8,78.4,72.2,83.2,85.9,82.4,88.6,0
7,zhou_D,S24-3,266,86.8,84.1,89.8,92.4,89.5,95.5,89.5,86.8,91.4,1
8,zhou_D,S24-4,567,85.9,84.6,87.1,81.2,74.6,84.3,83.5,79.8,86.3,0
9,zhou_D,S24-5,270,83.3,81.1,86.6,79.8,72.6,85.2,81.5,78.5,84.4,0


In [12]:
output.to_csv("data/meta_analysis_data_with_sim_f1.csv", index=False)