In [None]:
# from rw_ve_plot import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import PythonMeta as PMA
%load_ext autoreload
%autoreload 2
plt.style.use(r"./RW_visualization.mplstyle")


In [10]:
# import warnings filter
# from pandas.core.common import SettingWithCopyWarning
import warnings
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)


# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
pd.options.mode.chained_assignment = None 

In [11]:
# Run vaccine_efficacy_CIs.ipynb to update the data
# df1 = pd.read_excel(r'C:\Users\dipes\python\jupyter_notebook_files\Vaccine details.xlsx')
vaccine_detail = pd.read_excel('rw_vaccine_details.xlsx')
# vaccine_detail = pd.read_excel('./Vaccine/RW_vaccine_details.xlsx')
vaccine_detail.columns = vaccine_detail.columns.str.strip().str.lower().str.replace(' ', '_').\
    str.replace('(', '').str.replace(')', '')

# Remove all vaccine
vaccine_detail = vaccine_detail[vaccine_detail.vaccine!='All vaccine']

In [12]:
# Replace the vaccine efficacy and Ci by my estimations
vaccine_detail['efficacy_in_%'] = vaccine_detail['rw_efficacy_in_%']
vaccine_detail['lower'] = vaccine_detail['rw_lower']
vaccine_detail['upper'] = vaccine_detail['rw_upper']

In [13]:
# Drop empty vaccine efficacy
df = vaccine_detail[~np.isnan(vaccine_detail['efficacy_in_%'])]

In [14]:
from tabulate import tabulate  # For nice table formatting

In [15]:
def create_latex_tables(df):
    # Get unique values
    unique_aves = sorted(df['ave'].unique())
    unique_vaccines = sorted(df['vaccine'].unique())
    unique_variants = sorted(df['variant'].unique())
    
    # Create LaTeX tables for each ave type
    latex_tables = []
    
    for ave_type in unique_aves:
        # Create pivot table for data and transpose it
        pivot_data = pd.pivot_table(
            df[df['ave'] == ave_type],
            values='unnamed:_0',
            index='vaccine',
            columns='variant',
            aggfunc='size',
            fill_value=0
        )

        # Filter out variants with no data
        variants_with_data = [var for var in unique_variants 
                            if var in pivot_data.columns and pivot_data[var].sum() > 0]
        
        # Start LaTeX table
        latex_table = [
            "\\begin{table}[h]",
            "\\centering",
            f"\\caption{{data counts for {ave_type.lower()} endpoint.}}",
            "\\begin{tabular}{l" + "c" * (len(variants_with_data) + 1) + "}",  # +1 for Total column
            "\\hline",
            "Vaccine & " + " & ".join(variants_with_data) + " & Total \\\\",
            "\\hline"
        ]
        
        # Add data rows with row totals
        row_has_data = False
        for vaccine in unique_vaccines:
            if vaccine in pivot_data.index:
                row_values = [str(int(pivot_data.loc[vaccine, variant])) if variant in pivot_data.columns else '0' 
                            for variant in variants_with_data]
                row_total = sum(int(x) for x in row_values)
                if row_total > 0:  # Only include rows with non-zero data
                    row_has_data = True
                    latex_table.append(f"{vaccine} & " + " & ".join(row_values) + f" & {row_total} \\\\")
        
        # Add column totals row
        if row_has_data:
            col_totals = []
            for variant in variants_with_data:
                if variant in pivot_data.columns:
                    col_totals.append(str(int(pivot_data[variant].sum())))
                else:
                    col_totals.append('0')
            grand_total = sum(int(x) for x in col_totals)
            latex_table.append("\\hline")
            latex_table.append("Total & " + " & ".join(col_totals) + f" & {grand_total} \\\\")
        
        # Close table
        latex_table.extend([
            "\\hline",
            "\\end{tabular}",
            "\\label{tab:" + ave_type.lower().replace(" ", "_") + "}",
            "\\end{table}",
            "\n"  # Add blank line between tables
        ])
        
        # Join all lines and add to collection
        latex_tables.append('\n'.join(latex_table))
    
    return latex_tables

In [16]:
latex_tables = create_latex_tables(df)
for i, table in enumerate(latex_tables):
    with open(f'table_{i+1}.txt', 'w') as f:
        f.write(table)



# Meta analysis

In [17]:
meta_df = pd.DataFrame({
        'study': df['reference'],  # Study identifier
        'vax_n': df['no_of_participants_in_vaccine_group'],  # Vaccine group size
        'vax_cases': df['no_of_infected_in_vaccine_group'],  # Cases in vaccine group
        'ctrl_n': df['no_of_participants_in_placebo_groupcontrol_group'],  # Control group size
        'ctrl_cases': df['no_of_infected_in_placebo_group'],  # Cases in control group
        'vaccine': df['vaccine'],  # Vaccine type
        'variant': df['variant'],  # Virus variant
        'ave': df['ave']  # Type of vaccine efficacy measured
    })

In [None]:
meta_df

In [19]:
# Convert numeric columns to appropriate type
numeric_cols = ['vax_n', 'vax_cases', 'ctrl_n', 'ctrl_cases']
for col in numeric_cols:
    meta_df[col] = pd.to_numeric(meta_df[col], errors='coerce')

In [None]:
meta_df

In [21]:
def showstudies(studies,dtype):    
    #show continuous data
    if dtype.upper()=="CONT":
        text = "%-10s %-30s %-30s \n"%("Study ID","Experiment Group","Control Group")
        text += "%-10s %-10s %-10s %-10s %-10s %-10s %-10s \n"%(" ","m1","sd1","n1","m2","sd2","n2")
        for i in range(len(studies)):
            text += "%-10s %-10s %-10s %-10s %-10s  %-10s %-10s \n"%(
            studies[i][6],        #study ID
            str(studies[i][0]),   #mean of group1
            str(studies[i][1]),   #SD of group1
            str(studies[i][2]),   #total num of group1
            str(studies[i][3]),   #mean of group2
            str(studies[i][4]),   #SD of group2
            str(studies[i][5])    #total num of group2
            )
        return text
        
    #show dichotomous data
    text = "%-10s %-20s %-20s \n"%("Study ID","Experiment Group","Control Group")
    text += "%-10s %-10s %-10s %-10s %-10s \n"%(" ","e1","n1","e2","n2")
    for i in range(len(studies)):
        text += "%-10s %-10s %-10s %-10s %-10s \n"%(
        studies[i][4],        #study ID
        str(studies[i][0]),   #event num of group1
        str(studies[i][1]),   #total num of group1
        str(studies[i][2]),   #event num of group2
        str(studies[i][3])    #total num of group2
        )
    return text

def showresults(rults):
    text = "%-10s %-6s  %-18s %-10s"%("Study ID","n","ES[95% CI]","Weight(%)\n")    
    for i in range(1,len(rults)):
        text += "%-10s %-6d  %-4.2f[%.2f %.2f]   %6.2f\n"%(   # for each study
        rults[i][0],     #study ID
        rults[i][5],     #total num
        rults[i][1],     #effect size
        rults[i][3],     #lower of CI
        rults[i][4],     #higher of CI
        100*(rults[i][2]/rults[0][2])  #weight
        )
    text += "%-10s %-6d  %-4.2f[%.2f %.2f]   %6d\n"%(         # for total effect
        rults[0][0],     #total effect size name
        rults[0][5],     #total N (all studies)
        rults[0][1],     #total effect size
        rults[0][3],     #total lower CI
        rults[0][4],     #total higher CI
        100
        )  
    text += "%d studies included (N=%d)\n"%(len(rults)-1,rults[0][5])
    text += "Heterogeneity: Tau\u00b2=%.3f "%(rults[0][12]) if not rults[0][12]==None else "Heterogeneity: "
    text += "Q(Chisquare)=%.2f(p=%s); I\u00b2=%s\n"%(
        rults[0][7],     #Q test value
        rults[0][8],     #p value for Q test
        str(round(rults[0][9],2))+"%")   #I-square value
    text += "Overall effect test: z=%.2f, p=%s\n"%(rults[0][10],rults[0][11])  #z-test value and p-value
    
    return text

In [22]:
def prepare_data_for_pythonmeta(excel_file, ave, variant='SARS-CoV-2'):
    # Read the Excel file
    df = pd.read_excel(excel_file)
    
    # Replace 'X' with NaN
    df = df.replace('X', np.nan)
    
    df = df[df['ave']==ave]
    df = df[df['variant']==variant]

    # Convert numeric columns to appropriate type
    numeric_cols = ['no_of_participants_in_vaccine_group', 'no_of_infected_in_vaccine_group',
                   'no_of_participants_in_placebo_groupcontrol_group', 'no_of_infected_in_placebo_group']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Create formatted strings for PythonMeta
    study_strings = []
    for _, row in df.iterrows():
        if pd.notna(row['no_of_infected_in_vaccine_group']) and pd.notna(row['no_of_infected_in_placebo_group']):
            study_string = f"{row['reference']}, {int(row['no_of_infected_in_vaccine_group'])}, {int(row['no_of_participants_in_vaccine_group'])}, {int(row['no_of_infected_in_placebo_group'])}, {int(row['no_of_participants_in_placebo_groupcontrol_group'])}"
            study_strings.append(study_string)
    
    return study_strings

In [23]:
# Run the meta-analysis
def run_meta_analysis(study_data, settings, ave, save_fig=False):
    d = PMA.Data()  # Load Data class
    m = PMA.Meta()  # Load Meta class
    f = PMA.Fig()   # Load Fig class
    
    # Set data type
    d.datatype = settings["datatype"]
    
    # Load data
    studies = d.getdata(study_data)
    print("Studies included:")
    print(showstudies(studies, d.datatype))
    
    # Set meta-analysis parameters
    m.datatype = d.datatype
    m.models = settings["models"]
    m.algorithm = settings["algorithm"]
    m.effect = settings["effect"]
    
    # Perform the analysis
    results = m.meta(studies)
    
    # Show results
    print(f"\n{m.models} {m.algorithm} {m.effect} Meta-Analysis Results:")
    print(showresults(results))
    
    # Generate plots
    f.forest(results).show()
    f.funnel(results).show()

    if save_fig:
        f.funnel(results).savefig(f'RW2025_funnel_{settings["models"]}_{ave}.pdf', bbox_inches='tight')
    
    # Perform Egger's test
    print("\nEgger's test results:")
    print(m.Eggers_test(results))

In [None]:
# Symptomatic + random effects

# Prepare the data
ave = 'Symptomatic'
study_data = prepare_data_for_pythonmeta('rw_vaccine_details.xlsx', ave=ave)

# Set up the analysis settings
settings = {
    "datatype": "CATE",  # for CATEgorical/binary data
    "models": "Random",  # Random effects model
    "algorithm": "MH",   # Mantel-Haenszel method
    "effect": "RR"      # Risk Ratio as effect measure
}

# Run the analysis
results = run_meta_analysis(study_data, settings, ave=ave, save_fig=False)

In [None]:
# Symptomatic + fixed effect

# Prepare the data
ave = 'Symptomatic'
study_data = prepare_data_for_pythonmeta('rw_vaccine_details.xlsx', ave=ave)

# Set up the analysis settings
settings = {
    "datatype": "CATE",  # for CATEgorical/binary data
    "models": "Fixed",  # Random effects model
    "algorithm": "MH",   # Mantel-Haenszel method
    "effect": "RR"      # Risk Ratio as effect measure
}

# Run the analysis
run_meta_analysis(study_data, settings, ave=ave, save_fig=False)

In [None]:
# Severe + Random effects

# Prepare the data
ave = 'Severe'
study_data = prepare_data_for_pythonmeta('rw_vaccine_details.xlsx', ave=ave)

# Set up the analysis settings
settings = {
    "datatype": "CATE",  # for CATEgorical/binary data
    "models": "Random",  # Random effects model
    "algorithm": "MH",   # Mantel-Haenszel method
    "effect": "RR"      # Risk Ratio as effect measure
}

# Run the analysis
run_meta_analysis(study_data, settings, ave=ave, save_fig=False)

In [None]:
# Severe + fixed effect

# Prepare the data
ave = 'Severe'
study_data = prepare_data_for_pythonmeta('rw_vaccine_details.xlsx', ave=ave)

# Set up the analysis settings
settings = {
    "datatype": "CATE",  # for CATEgorical/binary data
    "models": "Fixed",  # Random effects model
    "algorithm": "MH",   # Mantel-Haenszel method
    "effect": "RR"      # Risk Ratio as effect measure
}

# Run the analysis
run_meta_analysis(study_data, settings, ave=ave, save_fig=False)

In [28]:
def create_combined_meta_analysis_table(meta_results_fixed, meta_results_random, ave_type):
    """
    Creates a LaTeX table combining fixed and random effects meta-analysis results.
    
    Parameters:
    meta_results_fixed (list): Results from fixed effects meta-analysis
    meta_results_random (list): Results from random effects meta-analysis
    ave_type (str): Type of endpoint (e.g., 'Symptomatic', 'Severe')
    
    Returns:
    str: LaTeX formatted table
    """
    # Start LaTeX table
    latex_lines = [
        "\\begin{table}[htbp]",
        "\\centering",
        f"\\caption{{Meta-analysis results for {ave_type.lower()} endpoint}}",
        "\\begin{tabular}{lccc}",
        "\\hline",
        "Author year & RR (95\\%CI) & FE & RE \\\\",
        "\\hline"
    ]
    
    # Process each study's results
    # Skip first row as it contains summary statistics
    for i in range(1, len(meta_results_fixed)):
        study_id = meta_results_fixed[i][0]
        
        # Calculate CI string
        rr = meta_results_fixed[i][1]
        ci_lower = meta_results_fixed[i][3]
        ci_upper = meta_results_fixed[i][4]
        ci_str = f"{rr:.2f}({ci_lower:.2f}--{ci_upper:.2f})"
        
        # Calculate weights
        fe_weight = 100 * (meta_results_fixed[i][2] / meta_results_fixed[0][2])
        re_weight = 100 * (meta_results_random[i][2] / meta_results_random[0][2])
        
        # Add row to table
        latex_lines.append(
            f"{study_id} & {ci_str} & {fe_weight:.2f} & {re_weight:.2f} \\\\"
        )
    
    # Add summary statistics
    latex_lines.extend([
        "\\hline",
        "Total & & Fix & Rand \\\\",
        f"Random & {meta_results_random[0][1]:.2f}({meta_results_random[0][3]:.2f}--{meta_results_random[0][4]:.2f}) & & \\\\",
        f"Fixed & {meta_results_fixed[0][1]:.2f}({meta_results_fixed[0][3]:.2f}--{meta_results_fixed[0][4]:.2f}) & & \\\\",
        f"Tau$^2$ & {meta_results_random[0][12]:.3f} & & \\\\",
        f"I$^2$ & {meta_results_random[0][9]:.2f} & & \\\\",
        f"Q & {meta_results_random[0][7]:.2f} & & \\\\",
        f"P & {meta_results_random[0][8]} & & \\\\",
        f"Z & {meta_results_random[0][10]:.2f}--{meta_results_fixed[0][10]:.2f} & & \\\\",
        "\\hline",
        "\\end{tabular}",
        "\\label{tab:meta_" + ave_type.lower() + "}",
        "\\end{table}"
    ])
    
    return "\n".join(latex_lines)


In [29]:
def run_combined_meta_analysis(excel_file, ave, save_file=False):
    """
    Runs both fixed and random effects meta-analyses and generates a combined LaTeX table.
    
    Parameters:
    excel_file (str): Path to the Excel file containing the data
    ave (str): Type of endpoint to analyze
    
    Returns:
    str: LaTeX formatted table
    """
    # Prepare the data
    study_data = prepare_data_for_pythonmeta(excel_file, ave=ave)
    
    # Set up the base settings
    base_settings = {
        "datatype": "CATE",
        "algorithm": "MH",
        "effect": "RR"
    }
    
    # Create settings for both models
    settings_fixed = {**base_settings, "models": "Fixed"}
    settings_random = {**base_settings, "models": "Random"}
    
    # Initialize PythonMeta classes
    d = PMA.Data()
    m = PMA.Meta()
    
    # Set data type
    d.datatype = base_settings["datatype"]
    
    # Load data
    studies = d.getdata(study_data)
    
    # Run fixed effects analysis
    m.datatype = d.datatype
    m.models = settings_fixed["models"]
    m.algorithm = settings_fixed["algorithm"]
    m.effect = settings_fixed["effect"]
    results_fixed = m.meta(studies)
    
    # Run random effects analysis
    m.models = settings_random["models"]
    results_random = m.meta(studies)
    
    # Generate combined table
    latex_table = create_combined_meta_analysis_table(results_fixed, results_random, ave)

    if save_file:
        with open(f"meta_analysis_{ave.lower()}.txt", "w") as f:
            f.write(latex_table)
    
    return latex_table

In [None]:
run_combined_meta_analysis('rw_vaccine_details.xlsx', ave='Symptomatic', save_file=False)

In [None]:
run_combined_meta_analysis('rw_vaccine_details.xlsx', ave='Severe', save_file=False)