In [8]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the Excel file
file_path = '/mnt/d/pydatascience/VL_TEE/data/17.04.24 data extraction_VL use for TEE probe insertion.xlsx'  # Update this to the correct path of the Excel file on your local system
data_extraction = pd.read_excel(file_path, sheet_name='Data extraction')

# Display the first few rows to understand the data structure
data_extraction.head()


Unnamed: 0,Study ID,Total sample size,Sample size,Unnamed: 3,Overall oropharyngeal injury (no.),Unnamed: 5,Pharyngeal injury (no.),Unnamed: 7,Blood presence on tip of TEE probe,Unnamed: 9,...,Visibility of oesophageal inlet (no.),Unnamed: 13,Duration of TOE probe insertion (s),Unnamed: 15,Success at first attempt,Unnamed: 17,Success at second attempt,Unnamed: 19,Success at third attempt,Unnamed: 21
0,,,VL,Control,VL,Control,VL,Control,VL,Control,...,VL,Control,VL,Control,VL,Control,VL,Control,VL,Control
1,Ishida 2016,99.0,50,49,2,8,2,8,Not reported,Not reported,...,44,20,21+/- 7,36 +/- 13,46,38,4,9,0,2
2,Ozturk 2017,86.0,43,43,3,17,1,7,2,9,...,Not reported,Not reported,24 +/- 5,18 +/- 8,38,18,4,16,0,7
3,Borde 2022,363.0,186,177,14,26,Not separately reported,Not separately reported,Not separately reported,Not separately reported,...,Not reported,Not reported,Not reported,Not reported,176,154,10,20,0,3
4,Taboada 2024,100.0,50,50,9,39,7,26,2,13,...,49,48,23.52 +/- 31.28,28.08+/- 35.63,45,29,4,5,1,2


### Calculate the odds ratio of the primary outcome: Overall oropharyngeal injury

In [9]:
# Extract the data from the image and represent it in a DataFrame
data_dict = {
    "Study ID": ["Ishida 2016", "Ozturk 2017", "Borde 2022", "Taboada 2024"],
    "VL_sample_size": [50, 43, 186, 50],
    "Control_sample_size": [49, 43, 177, 50],
    "VL_injuries": [2, 3, 14, 9],
    "Control_injuries": [8, 17, 26, 39]
}

# Convert dictionary to DataFrame
injury_data = pd.DataFrame(data_dict)

# Display the DataFrame
injury_data

# Calculate the odds ratio for each study
# Using the formula: (a/c) / (b/d) where
# a = VL_injuries (event in treatment group)
# b = VL_sample_size - VL_injuries (non-event in treatment group)
# c = Control_injuries (event in control group)
# d = Control_sample_size - Control_injuries (non-event in control group)

injury_data['OddsRatio'] = (injury_data['VL_injuries'] / (injury_data['VL_sample_size'] - injury_data['VL_injuries'])) / \
                            (injury_data['Control_injuries'] / (injury_data['Control_sample_size'] - injury_data['Control_injuries']))

# Display the DataFrame with Odds Ratio
injury_data[['Study ID', 'OddsRatio']]


Unnamed: 0,Study ID,OddsRatio
0,Ishida 2016,0.213542
1,Ozturk 2017,0.114706
2,Borde 2022,0.472719
3,Taboada 2024,0.061914


In [11]:
# Define the function for performing meta-analysis on odds ratios using the DerSimonian and Laird random effects model
def perform_meta_analysis_or(valid_data):
    # Convert OR to log(OR) for analysis
    valid_data['log_OR'] = np.log(valid_data['OddsRatio'])
    # Calculate the sampling variance of the log(OR) (Var[log(OR)])
    valid_data['var_log_OR'] = 1 / valid_data['VL_injuries'] + 1 / valid_data['Control_injuries'] + \
                               1 / (valid_data['VL_sample_size'] - valid_data['VL_injuries']) + \
                               1 / (valid_data['Control_sample_size'] - valid_data['Control_injuries'])

    # Calculate the weights for the fixed effect model
    w_FE = 1 / valid_data['var_log_OR']
    
    # Calculate the fixed effect estimate
    log_OR_FE = np.sum(w_FE * valid_data['log_OR']) / np.sum(w_FE)
    
    # Calculate the total heterogeneity (Q statistic)
    Q = np.sum(w_FE * (valid_data['log_OR'] - log_OR_FE)**2)
    
    # Degrees of freedom for the Q statistic is the number of studies minus 1
    df_Q = len(valid_data) - 1
    
    # p-value for the Q statistic to test for heterogeneity
    p_value_Q = 1 - stats.chi2.cdf(Q, df_Q)

    # Estimate the between-study variance (tau^2) using the DerSimonian-Laird estimator
    tau2 = max(0, (Q - df_Q) / (np.sum(w_FE) - np.sum(w_FE**2) / np.sum(w_FE)))
    
    # Calculate the weights for the random effects model
    w_RE = 1 / (valid_data['var_log_OR'] + tau2)
    
    # Calculate the random effects estimate
    log_OR_RE = np.sum(w_RE * valid_data['log_OR']) / np.sum(w_RE)
    
    # Standard error of the random effects estimate
    se_log_OR_RE = np.sqrt(1 / np.sum(w_RE))
    
    # 95% confidence interval for the random effects estimate
    ci_log_OR_RE = (log_OR_RE - 1.96 * se_log_OR_RE, log_OR_RE + 1.96 * se_log_OR_RE)
    
    # Transform back to OR scale
    OR_RE = np.exp(log_OR_RE)
    ci_OR_RE = (np.exp(ci_log_OR_RE[0]), np.exp(ci_log_OR_RE[1]))
    
    # Calculate I^2 (proportion of total variation in study estimates that is due to heterogeneity)
    I2 = max(0, (Q - df_Q) / Q) * 100 if Q > df_Q else 0
    
    return {
        'combined_OR': OR_RE,
        'ci_OR': ci_OR_RE,
        'I2': I2,
        'Q': Q,
        'p_value_Q': p_value_Q,
        'tau2': tau2
    }

# Perform the meta-analysis for odds ratios
meta_analysis_results = perform_meta_analysis_or(injury_data)

meta_analysis_results


{'combined_OR': 0.16891239723131934,
 'ci_OR': (0.05682706006275246, 0.5020741510633249),
 'I2': 75.18318709163478,
 'Q': 12.088578864164958,
 'p_value_Q': 0.00708580505630807,
 'tau2': 0.8938469881326778}