In [2]:
from rw_ve_plot import *
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pylab import rcParams
plt.style.use('RW_visualization.mplstyle')


In [3]:
# import warnings filter
from pandas.errors import SettingWithCopyWarning
import warnings
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)


warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)


Load data

In [None]:
# df1 = pd.read_excel(r'C:\Users\dipes\python\jupyter_notebook_files\Vaccine details.xlsx')
vaccine_detail = pd.read_excel('vaccine_details.xlsx')
vaccine_detail.columns = vaccine_detail.columns.str.strip().str.lower().str.replace(' ', '_').\
    str.replace('(', '').str.replace(')', '')

# Remove all vaccine rows
vaccine_detail = vaccine_detail[~(vaccine_detail['vaccine']=='All vaccine')]

vaccine_detail['method_to_calculate_ve'] = vaccine_detail['method_to_calculate_ve'].replace('1- adj relative risk', '1-relative risk') 


Calculate vaccine efficacy and CIs by relative risk and poission regression with robust error variance if no 0 in each group

In [5]:
zero_adjust = True # Add 0.5 to zero values base on Pagano & Gauvreau, 2000; Deeks & Higgins, 2010

# Calculate vaccine efficacy and confidence interval
RW_vaccine_efficacy = np.ones(len(vaccine_detail))*np.nan
RW_lower = np.ones(len(vaccine_detail))*np.nan
RW_higher = np.ones(len(vaccine_detail))*np.nan
for i, row in vaccine_detail.iterrows():
    xv = row['no_of_participants_in_vaccine_group']
    xp = row['no_of_participants_in_placebo_groupcontrol_group']
    xve = row['no_of_infected_in_vaccine_group']
    xpe = row['no_of_infected_in_placebo_group']
    if xve != 'X':
        if zero_adjust:
            # Relative risk
            # Pagano & Gauvreau, 2000; Deeks & Higgins, 2010
            if xve == 0 or xpe == 0:
                xve += 0.5
                xpe += 0.5
                xv += 1
                xp += 1
            r1 = xve/xv
            r2 = xpe/xp
            theta = r1/r2
            VE = 1-theta
            RW_vaccine_efficacy[i] = VE
            variance = 1/xve - 1/xv + 1/xpe - 1/xp
            se = np.sqrt(variance)
            RW_lower[i] = 1-theta*np.exp(1.96*se)
            RW_higher[i] = 1-theta*np.exp(-1.96*se)
        else:
            r1 = xve/xv
            r2 = xpe/xp
            theta = r1/r2
            VE = 1-theta
            RW_vaccine_efficacy[i] = VE
            if (xve != 0) & (xpe != 0):
                variance = 1/xve - 1/xv + 1/xpe - 1/xp
                se = np.sqrt(variance)
                RW_lower[i] = 1-theta*np.exp(1.96*se)
                RW_higher[i] = 1-theta*np.exp(-1.96*se)
            else:
                RW_lower[i] = 0
                RW_higher[i] = 0

        

In [6]:
# Append my result to the table
vaccine_detail['RW_efficacy_in_%'] = RW_vaccine_efficacy*100
vaccine_detail['RW_lower'] = RW_lower*100
vaccine_detail['RW_upper'] = RW_higher*100

vaccine_detail.to_excel('rw_vaccine_details.xlsx')


In [7]:
# Extract references array
import re
references = np.array([])
for i in vaccine_detail['reference'].str.split('_'):
    reference = i[0]
    reference = re.sub("[A-Za-z]+", lambda ele: " " + ele[0] + " et al. ", reference)
    references = np.append(references, reference)

In [8]:
# Correct article's vaccine efficacy label
for i in range(len(vaccine_detail)):
    efficacy_method = vaccine_detail.method_to_calculate_ve.loc[i]
    for j, word in enumerate(efficacy_method):
        if word.isalpha() == True:
            efficacy_method = efficacy_method[j::]
            efficacy_method = efficacy_method[0].upper() + efficacy_method[1::]
            vaccine_detail.method_to_calculate_ve.loc[i] = efficacy_method
            break

paper_ve_methods = vaccine_detail.method_to_calculate_ve.to_numpy()
paper_ve_methods[paper_ve_methods == 'X'] = 'Not specify'

In [9]:
# Correct article's CI label
for i in range(len(vaccine_detail)):
    CI = vaccine_detail.method_to_calculate_ci.loc[i]
    CI = CI[0].upper() + CI[1::]
    vaccine_detail.method_to_calculate_ci.loc[i] = CI

paper_ci_methods = vaccine_detail.method_to_calculate_ci.to_numpy()
paper_ci_methods[paper_ci_methods == 'X'] = 'Not specify'
# paper_ci_methods[paper_ci_methods ==
#                  "Stratified Cox proportional-hazards model with Efron's method of tie handling"] = 'Cox proportional-hazards'
# paper_ci_methods[paper_ci_methods == 'Poisson regression with robust error variance'] = 'Poisson regression'

Plot my CIs vs paper's CIs

In [None]:
vaccine_detail.columns


In [11]:
# Clean nan
RW_vaccine_efficacy = vaccine_detail['RW_efficacy_in_%'].to_numpy()
nan_map = np.isnan(RW_vaccine_efficacy)
RW_vaccine_efficacy = RW_vaccine_efficacy[~nan_map]

paper_vaccine_efficacy = vaccine_detail['efficacy_in_%'].to_numpy()
paper_vaccine_efficacy = paper_vaccine_efficacy[~nan_map]


paper_lower_bound = vaccine_detail['lower'].to_numpy()
paper_lower_bound = paper_lower_bound[~nan_map]
paper_lower_bound[paper_lower_bound == 'X'] = paper_vaccine_efficacy[paper_lower_bound == 'X']

paper_upper_bound = vaccine_detail['upper'].to_numpy()
paper_upper_bound = paper_upper_bound[~nan_map]
paper_upper_bound[paper_upper_bound == 'X'] = paper_vaccine_efficacy[paper_upper_bound == 'X']

RW_vaccine_efficacy = vaccine_detail['RW_efficacy_in_%'].to_numpy()
RW_vaccine_efficacy = RW_vaccine_efficacy[~nan_map]

RW_lower_bound = vaccine_detail['RW_lower'].to_numpy()
RW_lower_bound = RW_lower_bound[~nan_map]

RW_upper_bound = vaccine_detail['RW_upper'].to_numpy()
RW_upper_bound = RW_upper_bound[~nan_map]

paper_ve_methods = paper_ve_methods[~nan_map]
paper_ci_methods = paper_ci_methods[~nan_map]
references = references[~nan_map]

In [None]:
plot_compare_reproduced_vaccine_efficacy_scalar_plot(paper_vaccine_efficacy, paper_lower_bound, paper_upper_bound, \
    paper_ve_methods, paper_ci_methods, \
    RW_vaccine_efficacy, RW_lower_bound, RW_upper_bound, save_fig=False)

In [None]:
plot_compare_reproduced_vaccine_efficacy_error_bar_plot(paper_vaccine_efficacy, paper_lower_bound,
                                                            paper_upper_bound, paper_ve_methods, paper_ci_methods,
                                                            RW_vaccine_efficacy, RW_lower_bound, RW_upper_bound, 
                                                            references, save_fig=False)

In [None]:
print(f'Total number of recalibrated VE: {len(paper_vaccine_efficacy)}')
ve_difference = paper_vaccine_efficacy-RW_vaccine_efficacy
ve_index = np.argsort(ve_difference)

data = {'ve_difference': ve_difference, 'rw_ve': RW_vaccine_efficacy, 'paper_ve_methods': paper_ve_methods, 'paper_ci_methods': paper_ci_methods, 'reference': references}
df = pd.DataFrame(data)
print(df[abs(df['ve_difference']) > 3])


In [None]:
np.unique(paper_ve_methods)

In [None]:
fig1 = different_between_efficacy(paper_vaccine_efficacy, paper_ve_methods, RW_vaccine_efficacy, save_fig=False)

In [None]:
# Remove the original 'X' cases.
map_list = paper_vaccine_efficacy!=paper_lower_bound

distance_between_CI(paper_vaccine_efficacy[map_list], paper_lower_bound[map_list],
                    paper_upper_bound[map_list], paper_ve_methods[map_list], paper_ci_methods[map_list],
                    RW_vaccine_efficacy[map_list], RW_lower_bound[map_list], RW_upper_bound[map_list],
                    save_fig=True)

In [None]:
distance_between_VE_to_threshold(paper_vaccine_efficacy[map_list], paper_ve_methods[map_list],
                                     RW_vaccine_efficacy[map_list], save_fig=False)


In [None]:
RW_lower_bound[RW_lower_bound<0]=0
paper_lower_bound[paper_lower_bound<0]=0
distance_between_lb_to_threshold(paper_vaccine_efficacy[map_list], paper_lower_bound[map_list],
                    paper_upper_bound[map_list], paper_ve_methods[map_list], paper_ci_methods[map_list],
                    RW_vaccine_efficacy[map_list], RW_lower_bound[map_list], RW_upper_bound[map_list],
                    save_fig=False)

## Funnel plot

In [158]:
# def create_ve_difference_funnel_plot(paper_ve, recalc_ve, sample_sizes, paper_ve_methods=None, save_fig=True):
#     """
#     Create a funnel plot to assess publication bias using differences between reported and recalculated VE.
    
#     Parameters:
#     -----------
#     paper_ve : array-like
#         The original reported vaccine efficacy values
#     recalc_ve : array-like
#         The recalculated vaccine efficacy values
#     sample_sizes : array-like
#         The sample sizes for each study
#     paper_ve_methods : array-like, optional
#         The methods used for calculating VE in each study
#     save_fig : bool, optional
#         Whether to save the figure (default True)
#     """
#     # paper_ve = vaccine_detail['efficacy_in_%']
#     # recalc_ve = vaccine_detail['RW_efficacy_in_%']
#     # sample_sizes = np.array(vaccine_detail['no_of_participants_in_vaccine_group']) + np.array(vaccine_detail['no_of_participants_in_placebo_groupcontrol_group']) + np.array(vaccine_detail['no_of_infected_in_vaccine_group']) + np.array(vaccine_detail['no_of_infected_in_placebo_group'])
#     # # Convert inputs to numpy arrays
#     paper_ve = np.array(paper_ve, dtype=float)
#     recalc_ve = np.array(recalc_ve, dtype=float)
#     sample_sizes = np.array(sample_sizes, dtype=float)
    
#     # Calculate VE differences
#     ve_differences = paper_ve - recalc_ve
    
#     # Calculate standard error for the difference
#     # Using pooled standard error for difference between two proportions
#     # print((paper_ve/100 * (1-paper_ve/100))/sample_sizes)
#     se_paper = np.sqrt((paper_ve/100 * (1-paper_ve/100)) / sample_sizes)
#     se_recalc = np.sqrt((recalc_ve/100 * (1-recalc_ve/100)) / sample_sizes)
#     standard_errors = np.sqrt(se_paper**2 + se_recalc**2)
#     # print(standard_errors)
#     # Create the funnel plot
#     fig, ax = plt.subplots(figsize=(12, 8))
    
#     if paper_ve_methods is not None:
#         # Create color palette for different methods
#         unique_methods = np.unique(paper_ve_methods)
#         palette = sns.color_palette('Set2', n_colors=len(unique_methods))
#         colors = {method: color for method, color in zip(unique_methods, palette)}
        
#         # Plot points with different colors for different methods
#         for method in unique_methods:
#             mask = paper_ve_methods == method
#             ax.scatter(ve_differences[mask], standard_errors[mask], 
#                       alpha=0.6, label=method, color=colors[method])
#         plt.legend(title='VE Methods', bbox_to_anchor=(1.05, 1), loc='upper left')
#     else:
#         # Plot all points in same color if no methods provided
#         ax.scatter(ve_differences, standard_errors, alpha=0.6, color='blue')
    
#     # Calculate and plot mean difference
#     mean_diff = np.mean(ve_differences)
#     ax.axvline(mean_diff, linestyle='--', color='red', alpha=0.5, label='Mean Difference')
    
#     # Add reference line at zero difference
#     ax.axvline(0, linestyle='-', color='gray', alpha=0.3, label='No Difference')
    
#     # Add pseudo confidence intervals
#     se_range = np.linspace(0, max(standard_errors)*1.1, 100)
#     plt.plot(mean_diff - 1.96 * se_range, se_range, 'k--', alpha=0.3)
#     plt.plot(mean_diff + 1.96 * se_range, se_range, 'k--', alpha=0.3)
#     # print(mean_diff - 1.96 * se_range, se_range, mean_diff + 1.96 * se_range)
    
#     # Customize the plot
#     ax.set_title('Funnel Plot of Differences Between Reported and Recalculated VE', fontsize=12)
#     ax.set_xlabel('VE Difference (Reported - Recalculated) (%)', fontsize=10)
#     ax.set_ylabel('Standard Error', fontsize=10)
    
#     # Invert y-axis (standard in funnel plots)
#     ax.invert_yaxis()
    
#     # Add grid
#     ax.grid(True, alpha=0.3)
    
#     # # Add text box with summary statistics
#     # stats_text = (f'Mean Difference: {mean_diff:.1f}%\n'
#                 #  f'Number of Studies: {len(ve_differences)}\n'
#     #              f'Sample Size Range: {min(sample_sizes):,} - {max(sample_sizes):,}\n'
#     #              f'Difference Range: {min(ve_differences):.1f}% to {max(ve_differences):.1f}%')
    
#     # ax.text(0.95, 0.95, stats_text,
#     #         transform=ax.transAxes,
#     #         verticalalignment='top',
#     #         horizontalalignment='right',
#     #         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
#     # Adjust layout to prevent text overlap
#     plt.tight_layout()
    
#     if save_fig:
#         plt.savefig('ve_difference_funnel_plot.pdf', bbox_inches='tight', dpi=300)
        
#     # return fig

In [None]:
# vaccine_detail['ave'].unique()

In [None]:
# paper_ve = vaccine_detail['efficacy_in_%']
# recalc_ve = vaccine_detail['RW_efficacy_in_%']
# x_ve = vaccine_detail['no_of_participants_in_vaccine_group']
# x_pe = vaccine_detail['no_of_participants_in_placebo_groupcontrol_group']
# x_vn = vaccine_detail['no_of_infected_in_vaccine_group']
# x_pn = vaccine_detail['no_of_infected_in_placebo_group']
# ave_type = vaccine_detail['ave']
# keep_map = (x_ve != 'X') & (x_pe != 'X') & (x_vn != 'X') & (x_pn != 'X') & (ave_type == 'Symptomatic')

# paper_ve = paper_ve[keep_map]
# recalc_ve = recalc_ve[keep_map]
# x_ve = x_ve[keep_map]
# x_pe = x_pe[keep_map]
# x_vn = x_vn[keep_map]
# x_pn = x_pn[keep_map]

# sample_sizes = x_ve + x_pe + x_vn + x_pn

# paper_ve_methods = vaccine_detail['method_to_calculate_ve'][keep_map]
# print(len(paper_ve_methods))

In [187]:
# create_ve_difference_funnel_plot(paper_ve, recalc_ve, sample_sizes, paper_ve_methods)

In [226]:
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns

# def create_ve_funnel_plot(ve_values, sample_sizes, ve_methods=None, save_fig=True):
#     """
#     Create a funnel plot to assess publication bias in vaccine efficacy studies.
    
#     Parameters:
#     -----------
#     ve_values : array-like
#         The reported vaccine efficacy values as proportions (e.g., 0.95 for 95% VE)
#     sample_sizes : array-like
#         The sample sizes for each study
#     ve_methods : array-like, optional
#         The methods used for calculating VE in each study
#     save_fig : bool, optional
#         Whether to save the figure (default True)
#     """
#     # Convert inputs to numpy arrays
#     ve_values = np.array(ve_values, dtype=float)
#     sample_sizes = np.array(sample_sizes, dtype=float)
    
#     # Calculate standard error for each study
#     # Using SE formula for proportions: sqrt(p*(1-p)/n)
#     standard_errors = np.sqrt((ve_values * (1-ve_values)) / sample_sizes)
    
#     # Create the funnel plot
#     fig, ax = plt.subplots(figsize=(10, 8))
    
#     if ve_methods is not None:
#         # Create color palette for different methods
#         unique_methods = np.unique(ve_methods)
#         palette = sns.color_palette('Set2', n_colors=len(unique_methods))
#         colors = {method: color for method, color in zip(unique_methods, palette)}
        
#         # Plot points with different colors for different methods
#         for method in unique_methods:
#             mask = ve_methods == method
#             ax.scatter(ve_values[mask], standard_errors[mask], 
#                       alpha=0.6, label=method, color=colors[method])
#         plt.legend(title='VE Methods', bbox_to_anchor=(1.05, 1), loc='upper left')
#     else:
#         # Plot all points in same color if no methods provided
#         ax.scatter(ve_values, standard_errors, alpha=0.6, color='blue')
    
#     # Calculate and plot mean effect
#     mean_ve = np.mean(ve_values)
#     ax.axvline(mean_ve, linestyle='--', color='red', alpha=0.5, label='Mean VE')
    
#     # Add pseudo confidence intervals
#     se_range = np.linspace(0, max(standard_errors)*1.1, 100)
#     plt.plot(mean_ve - 1.96 * se_range, se_range, 'k--', alpha=0.3)
#     plt.plot(mean_ve + 1.96 * se_range, se_range, 'k--', alpha=0.3)
    
#     # Customize the plot
#     ax.set_title('Funnel Plot of Vaccine Efficacy Studies', fontsize=12)
#     ax.set_xlabel('Vaccine Efficacy (proportion)', fontsize=10)
#     ax.set_ylabel('Standard Error', fontsize=10)
    
#     # Set reasonable x-axis limits for proportions
#     plt.xlim(0, 1)
    
#     # Invert y-axis (standard in funnel plots)
#     ax.invert_yaxis()
    
#     # Add grid
#     ax.grid(True, alpha=0.3)
    
#     # Add summary statistics
#     stats_text = (f'Mean VE: {mean_ve:.3f}\n'
#                  f'Number of Studies: {len(ve_values)}\n'
#                  f'Sample Size Range: {min(sample_sizes):,.0f} - {max(sample_sizes):,.0f}')
    
#     ax.text(0.95, 0.95, stats_text,
#             transform=ax.transAxes,
#             verticalalignment='top',
#             horizontalalignment='right',
#             bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
#     # Adjust layout
#     plt.tight_layout()
    
#     if save_fig:
#         plt.savefig('ve_funnel_plot.pdf', bbox_inches='tight', dpi=300)
    
#     return ax

In [227]:
# create_ve_difference_funnel_plot(paper_ve/100, sample_sizes, paper_ve_methods, save_fig=False)