# Set up dependencies  

In [None]:
!pip install rpy2==3.5.1

In [None]:
!pip install plotly

In [None]:
!pip install pandas

In [None]:
!pip install numpy

In [None]:
!pip install xlrd

In [None]:
!pip install jupyter-dash nbformat

In [None]:
!echo "install.packages(\"metafor\", repos=\"https://cran.rstudio.com\")" | R --no-save

In [None]:
!echo "install.packages(\"multcomp\", repos=\"https://cran.rstudio.com\")" | R --no-save

In [None]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
import plotly.colors
from plotly.subplots import make_subplots

from rpy2.robjects.packages import importr
import rpy2.robjects

from dash import Dash, dcc, html, Input, Output, dash_table

# Load and prepare data

In [None]:
from google.colab import files
uploaded = files.upload()

Saving Modified Data.xlsx to Modified Data.xlsx


In [None]:
# read excel sheet
info = pd.read_excel('Modified Data.xlsx', sheet_name='Studies', engine='openpyxl')
info['Sample points'] = info['N']
info = info.drop(info.columns[0], axis=1) # drop row with ID

# create 'Study', which concatenates the first author's name with the year of publication of the study.
# If more than one study share the same name, it adds a lower-case letter to the end of the name to distinguish between them.
year_str = info['Year_of_publication'].astype(str)
info['Study'] = info['First_author'] + ' et al., ' + year_str
info['Study'] = info.groupby('Study')['Study'].apply(lambda n: n+list(map(chr,np.arange(len(n))+97))
                                                     if len(n)>1 else n)
info['Number of studies'] = np.ones((len(info),1))
info = info.sort_values('Study')

# adds hyperlink to DOI column
info['Link'] = info['DOI']
info['Link'].replace('http',"""<a style='color:white' href='http""",
                    inplace=True, regex=True)
info['Link'] = info['Link'] + """'>->Go to the paper</a>"""

fields = ['Income_group', 'Region', 'Country',
          'Study_design', 'Patient_sample', 'N', 'N_female', 'N_male', 'Age_mean', 'Age_median', 'Age_sd', 'Age_range', 'Years_of_data_collection', 'Publication_language']

# creat column that contains a summary of each study's metadata in HTML format
info['Summary'] = info['Link'] + '<br><br>'
for i in fields:
    info['Summary'] = info['Summary'] + i + ': ' + info[i].astype(str) + '<br><br>'

# Pie chart

In [None]:
# group data by country and language
grouped_df  = info.copy()
grouped_df = grouped_df.groupby('Country')

# Calculate the percentage of each language for each country
language_pct = grouped_df.apply(lambda x: x['Publication_language'].value_counts(normalize=True))
language_pct = language_pct.unstack()
language_pct = language_pct * 100

In [None]:
# Lets determine the contribution of each country to total number of studies
study_counts = grouped_df.size() # Count the number of rows (studies) in each group (country)
total_studies = study_counts.shape[0] # Calculate the total number of studies
# Calculate the percentage of studies for each country by dividing
# the number of studies for each country by the total number of studies and multiplying by 100
study_pct = study_counts.apply(lambda x: x / total_studies * 100)

# Display the result
print(study_pct)

Country
Austria                3.703704
Bolivia                3.703704
Brazil                37.037037
Canada                 3.703704
Congo, Dem. Rep.       3.703704
Denmark                3.703704
Egypt, Arab Rep.       3.703704
France                 7.407407
Germany               11.111111
Greece                 3.703704
India                 14.814815
Iran, Islamic Rep.    11.111111
Italy                 11.111111
Japan                  3.703704
Kenya                  3.703704
Korea, Rep.            3.703704
Mexico                 3.703704
Netherlands            7.407407
Nigeria                3.703704
Pakistan               7.407407
Poland                 7.407407
Spain                  3.703704
Switzerland            3.703704
Turkey                51.851852
Uganda                11.111111
United Kingdom        11.111111
United States         70.370370
dtype: float64


In [None]:
cleaned_df  = info.copy()
# Drop all columns except for 'Country of the Study' and 'Original Language of article'
cleaned_df = cleaned_df.drop(columns=[col for col in cleaned_df.columns if col not in ['Country', 'Publication_language', 'DOI']])

# View the resulting data frame
print(cleaned_df)

                                                  DOI        Country  \
37          https://doi.org/10.1007/s00381-018-3781-2        Germany   
62          https://doi.org/10.1007/s00381-017-3663-z         Turkey   
81  https://www.scielo.org.mx/scielo.php?script=sc...         Mexico   
8       https://doi.org/10.1016/s0028-3843(14)60055-4         Greece   
10                 https://doi.org/10.1007/bf00271135  United States   
..                                                ...            ...   
33           https://doi.org/10.3171/2015.5.peds14692  United States   
45          https://doi.org/10.3171/2018.7.focus18280         Uganda   
6           https://doi.org/10.1007/s00381-006-0261-x         Brazil   
79          https://doi.org/10.1007/s00381-016-3237-5         Brazil   
63         https://doi.org/10.1007/s00381-020-04786-1         Turkey   

   Publication_language  
37              English  
62              English  
81              Spanish  
8               English  
10   

In [None]:
# Create new dataframe with summary info (country of the study / study_count / language_pct / study_pct)

# Create a new column in cleaned_df called percent of studies written in each language
cleaned_df['percent of studies written in english'] = cleaned_df.apply(lambda row: 1 if row['Publication_language'] == 'English' else 0, axis=1)
cleaned_df['percent of studies written in spanish'] = cleaned_df.apply(lambda row: 1 if row['Publication_language'] == 'Spanish' else 0, axis=1)
cleaned_df['percent of studies written in korean'] = cleaned_df.apply(lambda row: 1 if row['Publication_language'] == 'Korean' else 0, axis=1)

# Convert the values of 1 to 100%.
cleaned_df['percent of studies written in english'] = cleaned_df['percent of studies written in english'].map({1:100, 0:0})
cleaned_df['percent of studies written in spanish'] = cleaned_df['percent of studies written in spanish'].map({1:100, 0:0})
cleaned_df['percent of studies written in korean'] = cleaned_df['percent of studies written in korean'].map({1:100, 0:0})

# Create a new column in cleaned_df
cleaned_df['number of studies published by country'] = cleaned_df.groupby('Country')['Country'].transform('size')

# Create a new column in cleaned_df
cleaned_df['percent studies'] = cleaned_df['number of studies published by country'] / len(cleaned_df)

# Group cleaned_df by the Country of the Study column.
grouped_df = cleaned_df.groupby('Country')

# Calculate the mean of the percent of studies written
# in each language, number of studies published by country, and percent studies columns for each group.
summary_df = grouped_df.apply(lambda x: x[['percent of studies written in english', 'percent of studies written in spanish', 'percent of studies written in korean', 'number of studies published by country', 'percent studies']].mean())

# Assign the resulting DataFrame to summary_df.
summary_df = summary_df.reset_index()

print(summary_df)

               Country  percent of studies written in english  \
0              Austria                                  100.0   
1              Bolivia                                    0.0   
2               Brazil                                  100.0   
3               Canada                                  100.0   
4     Congo, Dem. Rep.                                  100.0   
5              Denmark                                  100.0   
6     Egypt, Arab Rep.                                  100.0   
7               France                                  100.0   
8              Germany                                  100.0   
9               Greece                                  100.0   
10               India                                  100.0   
11  Iran, Islamic Rep.                                  100.0   
12               Italy                                  100.0   
13               Japan                                  100.0   
14               Kenya   

In [None]:
# Create a pivot table with 'country' as the index and 'Publication_language' as the columns
pivot_table = cleaned_df.pivot_table(index='Country', columns='Publication_language', aggfunc='size', fill_value=0)

# Rename the columns of the pivot table
pivot_table.rename(columns={'English': 'percent of articles in english',
                            'Spanish': 'percent of articles in spanish',
                            'Korean': 'percent of articles in korean'}, inplace=True)

# Calculate the total number of articles for each country
pivot_table['count of studies'] = pivot_table.sum(axis=1)

# Calculate the percentage of articles for each language for each country
pivot_table = pivot_table.divide(pivot_table['count of studies'], axis=0)

# The resulting pivot table will have 'country of the study' as the index and the specified columns

In [None]:
# Convert percent studies and language percentage fields to numeric data types
summary_df['percent of studies written in english'] = pd.to_numeric(summary_df['percent of studies written in english'], errors='coerce')
summary_df['percent of studies written in spanish'] = pd.to_numeric(summary_df['percent of studies written in spanish'], errors='coerce')
summary_df['percent of studies written in korean'] = pd.to_numeric(summary_df['percent of studies written in korean'], errors='coerce')

# Remove rows with missing values
summary_df = summary_df.dropna()

In [None]:
# Create pie chart
fig = px.pie(
    summary_df,
    values='number of studies published by country',
    names='Country'
)

# #Copy data to new columns that do not contain spaces in the titles
summary_df['English'] = summary_df['percent of studies written in english']
summary_df['Spanish'] = summary_df['percent of studies written in spanish']
summary_df['Korean'] = summary_df['percent of studies written in korean']

# Update hovertemplate to include percent studies and language percentage fields
import numpy as np
fig.update_traces(hovertemplate='<b>%{label}</b>'+\
        '<br>No. of studies: %{value} (%{percent})'+\
        '<br>English: %{customdata[0][0]}%'+\
        '<br>Spanish: %{customdata[0][1]}%'+\
        '<br>Korean: %{customdata[0][2]}%',
    customdata = np.stack((summary_df['English'], summary_df['Spanish'], summary_df['Korean']), axis=-1)
)

# Show chart
fig.show()

# Pre-processing for meta-analysis  

In [None]:
df = pd.DataFrame()
data = pd.read_excel('Modified Data.xlsx', sheet_name='Outcomes', engine='openpyxl')
data = data.drop(data.columns[0], axis=1) # remove study ID coloumn since we will use DOI as a key

In [None]:
# measure_type is a dictionary with keys corresponding to four different types of measures that correspond to the columns in the DataFrame that contain data for each type of measure.
measure_type = {'Failure_rate','Mortality_periop_1st_line','Complication_rate_1st_line','Mortality_intraop_1st_line'}


# make sure Outcome column is consistent with the keys in measure_type, otherwise df_m will have no rows and studies will not be plotted
# all Income_group and rate combinating, some excluded bc we know from running metastudy{} later on that there is no data
measure_type = {
    'High income': {
        'Failure_rate': ['High income_Failure_rate'],
        'Mortality_periop_1st_line': ['High income_Mortality_periop_1st_line'],
        'Complication_rate_1st_line': ['High income_Complication_rate_1st_line'],
        'Mortality_intraop_1st_line':['High income_Mortality_intraop_1st_line']
    },
    'Upper middle income': {
        'Failure_rate': ['Upper middle income_Failure_rate'],
        'Mortality_periop_1st_line': ['Upper middle income_Mortality_periop_1st_line'],
        'Complication_rate_1st_line': ['Upper middle income_Complication_rate_1st_line'],
        'Mortality_intraop_1st_line':['Upper middle income_Mortality_intraop_1st_line']
    },
    'Lower middle income': {
        'Failure_rate': ['Lower middle income_Failure_rate'],
        'Mortality_periop_1st_line': ['Lower middle income_Mortality_periop_1st_line'],
        'Complication_rate_1st_line': ['Lower middle income_Complication_rate_1st_line'],
        'Mortality_intraop_1st_line':['Lower middle income_Mortality_intraop_1st_line']

    },
    'Low income': {
        'Failure_rate': ['Low income_Failure_rate'],
        'Mortality_periop_1st_line': ['Low income_Mortality_periop_1st_line'],
        'Complication_rate_1st_line': ['Low income_Complication_rate_1st_line'],
        'Mortality_intraop_1st_line':['Low income_Mortality_intraop_1st_line']
    }
}

#color_dict = {m:plotly.colors.qualitative.Bold[n]
              # for n,m in enumerate(measure_type.keys())}

In [None]:
color_dict = {
    'High income_Failure_rate': 'rgb(127, 60, 141)',
    'High income_Mortality_periop_1st_line': 'rgb(127, 60, 141)',
    'High income_Complication_rate_1st_line': 'rgb(127, 60, 141)',
    'High income_Mortality_intraop_1st_line': 'rgb(127, 60, 141)',

    'Upper middle income_Failure_rate': 'rgb(17, 165, 121)',
    'Upper middle income_Mortality_periop_1st_line': 'rgb(17, 165, 121)',
    'Upper middle income_Complication_rate_1st_line': 'rgb(17, 165, 121)',
    'Upper middle income_Mortality_intraop_1st_line' : 'rgb(17, 165, 121)',

    'Lower middle income_Failure_rate': 'rgb(57, 105, 172)',
    'Lower middle income_Mortality_periop_1st_line': 'rgb(57, 105, 172)',
    'Lower middle income_Complication_rate_1st_line': 'rgb(57, 105, 172)',
    'Lower middle income_Mortality_intraop_1st_line': 'rgb(242, 183, 1)',

    'Low income_Complication_rate_1st_line': 'rgb(242, 183, 1)',
}

In [None]:
# lets create a variable to store the column names of the outcome variables in 'data'.
# Later we will use 'outcomes' to iterate through each outcome variable in 'data'.
# we will use it to create a dictionary (outcomes_avail) that maps each outcome variable to its corresponding value for a given row.
# This is done using a dictionary comprehension and the zip() function to match the column name with its value in the row.

outcomes = data.columns[1:]
for _, row in data.iterrows():
        outcomes_avail = {m:value for m, value in zip(outcomes, row.tolist()[1:])
              if pd.notnull(value)}
        for m in outcomes_avail.keys():
            df = df.append([[row.DOI, m, outcomes_avail[m],
                             *info[info.DOI==row.DOI].values.tolist()[0][1:]]])
df.columns = ['DOI', 'Outcome', 'Rate', *info.columns[1:]]


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

In [None]:
# to allow mixed effects model to work later on:
# Remove NaN values from the 'Rate' column
df = df.dropna(subset=['Rate'])

In [None]:
df['Rate'] = pd.to_numeric(df['Rate'], errors='coerce')
df['N'] = pd.to_numeric(df['N'], errors='coerce')
df['N_female'] = pd.to_numeric(df['N_female'], errors='coerce')
df['N_male'] = pd.to_numeric(df['N_male'], errors='coerce')
df['Age_mean'] = pd.to_numeric(df['Age_mean'], errors='coerce')
df['Age_median'] = pd.to_numeric(df['Age_median'], errors='coerce')
df['Age_sd'] = pd.to_numeric(df['Age_sd'], errors='coerce')
df['Sample points'] = pd.to_numeric(df['Sample points'], errors='coerce')

In [None]:
df = df[df['Outcome'].isin(['Mortality_periop_1st_line', 'Failure_rate', 'Complication_rate_1st_line','Mortality_intraop_1st_line' ])]

In [None]:
# add column N from df to the last column in data
data = data.merge(df[['DOI', 'N']], on='DOI', how='left')

In [None]:
# convert these coloumn into rates.
# Divide the "Mortality_periop_1st_line" column by "N" column
data['Mortality_periop_1st_line'] = data['Mortality_periop_1st_line'] / data['N']
data['Mortality_intraop_1st_line'] = data['Mortality_intraop_1st_line'] / data['N']

In [None]:
#copy the dataframe in another one to miss the overlap
filtered_df=df.copy()
filtered_df

Unnamed: 0,DOI,Outcome,Rate,First_author,Year_of_publication,Income_group,Region,Country,Study_design,Patient_sample,...,Age_median,Age_sd,Age_range,Years_of_data_collection,Publication_language,Sample points,Study,Number of studies,Link,Summary
0,https://doi.org/10.1186/1471-2334-6-43,Mortality_intraop_1st_line,0.0,Sacar,2006,Upper middle income,Europe & Central Asia,Turkey,Retrospective study (unspecified),Selected,...,,,,2000-2004,English,7,"Sacar et al., 2006",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://doi.org/10.1007/s00381-010-1113-2,Mortality_intraop_1st_line,0.0,Clemmensen,2010,High income,Europe & Central Asia,Denmark,Retrospective study (unspecified),Selected,...,,,,1983-2007,English,59,"Clemmensen et al., 2010",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://doi.org/10.1007/s00381-010-1113-2,Mortality_periop_1st_line,0.0,Clemmensen,2010,High income,Europe & Central Asia,Denmark,Retrospective study (unspecified),Selected,...,,,,1983-2007,English,59,"Clemmensen et al., 2010",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://doi.org/10.1159/000072870,Mortality_intraop_1st_line,0.0,Sandquist,2003,High income,North America,United States,Prospective study (unspecified),Selected,...,,,,,English,5,"Sandquist et al., 2003",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://doi.org/10.1159/000072870,Mortality_periop_1st_line,0.0,Sandquist,2003,High income,North America,United States,Prospective study (unspecified),Selected,...,,,,,English,5,"Sandquist et al., 2003",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,https://doi.org/10.5137/1019-5149.jtn.26588-19.1,Mortality_periop_1st_line,1.0,Ozgural,2020,Upper middle income,Europe & Central Asia,Turkey,Retrospective study (unspecified),Selected,...,,,,2012-2018,English,57,"Ozgural et al., 2020",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://doi.org/10.1007/s00381-016-3237-5,Complication_rate_1st_line,0.2,de Oliveira,2016,Upper middle income,Latin America & Caribbean,Brazil,Multiple case report,Selected,...,168.0,,,2010-2012,English,5,"de Oliveira et al., 2016",1.0,<a style='color:white' href='https://doi.org/1...,<a style='color:white' href='https://doi.org/1...
0,https://advances.umw.edu.pl/en/article/2011/20...,Mortality_intraop_1st_line,0.0,Rysiakiewicz,2011,High income,Europe & Central Asia,Poland,Retrospective study (unspecified),Selected,...,,,,1997-2006,English,86,"Rysiakiewicz et al., 2011",1.0,<a style='color:white' href='https://advances....,<a style='color:white' href='https://advances....
0,https://advances.umw.edu.pl/en/article/2011/20...,Mortality_periop_1st_line,0.0,Rysiakiewicz,2011,High income,Europe & Central Asia,Poland,Retrospective study (unspecified),Selected,...,,,,1997-2006,English,86,"Rysiakiewicz et al., 2011",1.0,<a style='color:white' href='https://advances....,<a style='color:white' href='https://advances....


In [None]:
set(filtered_df['Outcome'].values)

{'Complication_rate_1st_line',
 'Failure_rate',
 'Mortality_intraop_1st_line',
 'Mortality_periop_1st_line'}

In [None]:
measure_type_set = set()
for key in measure_type.keys():
    for value in measure_type[key]:
        measure_type_set.add(value)
measure_type_set

{'Complication_rate_1st_line',
 'Failure_rate',
 'Mortality_intraop_1st_line',
 'Mortality_periop_1st_line'}

In [None]:
# create a set of all outcomes in filtered_df
outcomes_set = set(filtered_df['Outcome'].values)

# create a set of all outcomes in measure_type
measure_type_set = set()
for key in measure_type.keys():
    for value in measure_type[key]:
        measure_type_set.add(value)

# check if there are any outcomes in measure_type_set that are not in outcomes_set
missing_outcomes = measure_type_set - outcomes_set

# print missing outcomes
if missing_outcomes:
    print(f"The following outcomes are missing from filtered_df: {missing_outcomes}")
else:
    print("All outcomes in measure_type are present in filtered_df")

All outcomes in measure_type are present in filtered_df


# Using meta-analysis tools

The input is a filtered data frame containing information about studies on various outcomes for different income groups. The code iterates over the unique outcomes in the data frame, and for each outcome with more than two studies, it iterates over the unique income groups and subsets the data for the current outcome and income group.

The code performs a meta-analysis using 'metafor' to estimate the effect size and confidence intervals for the given income group and outcome (if the subsetted data is not empty and has non-positive variances, otherwise a message will be printed).

The results of the meta-analysis are added to a dictionary called 'metastudy' with the keys defined under 'measure_type'.

The output is the dictionary 'metastudy', which contains the estimated effect sizes and confidence intervals for each income group and outcome for which there were enough studies to perform a meta-analysis.

In [None]:
# calculate the variance of a statistical model based on the coefficient of determination (R-squared) and the number of sample points.
# Mancini looks at the total number of sample points in a dataset, based on the number of subjects and the ROI (return on investment) per subject.
filtered_df.loc[:, 'Variance'] = (4*filtered_df['Rate'])*((1-filtered_df['Rate'])**2)/filtered_df['Sample points']

In [None]:
metafor = importr('metafor')
stats = importr('stats')

metastudy = {} # this is where we will store the results of the meta-study
for outcome in filtered_df['Outcome'].unique():
    nstudies = len(filtered_df['Outcome'][filtered_df['Outcome']==outcome])
    if nstudies > 2:
        for group in filtered_df['Income_group'].unique():
            # Subset the data for the given income group and outcome
            subset = filtered_df[(filtered_df['Income_group'] == group) & (filtered_df['Outcome'] == outcome)] # creates a subset of the filtered data frame with only the rows that correspond to the current outcome and income group.
            if not subset.empty:
                subset = subset[subset['Variance'] > 0]
                if not subset.empty:
                    subset = subset.sort_values(by=['Year_of_publication'])
                    r2 = rpy2.robjects.FloatVector(subset['Rate']) # creates an R vector called r2 that contain the values of the outcome rate from the subset of data.
                    var = rpy2.robjects.FloatVector(subset['Variance']) # creates an R vector called var that contains the variance from the subset of data.
                    fit = metafor.rma(r2, var, method="REML", test="knha") # fits a random effects meta-analysis model to the subset of data. Specifies  the restricted maximum likelihood as the method, and specifies the use of the Knapp-Hartung modification to the t-test to calculate p-value.
                    res = stats.predict(fit)
                    results = dict(zip(res.names,list(res)))
                    # Add the results to the dictionary with appropriate keys
                    key = f"{group}_{outcome}"
                    metastudy[key] = dict(pred=results['pred'][0], # stores meta-analysis results for the current outcome and income group in the metastudy dictionary.
                                          cilb=results['pred'][0]-results['ci.lb'][0],
                                          ciub=results['ci.ub'][0]-results['pred'][0],
                                          crub=results['cr.ub'][0],
                                          crlb=results['cr.lb'][0])
                else:
                    print(f"All outcomes for '{outcome}' in the '{group}' income group have non-positive sampling variances.")
            else:
                print(f"No data for '{outcome}' in the '{group}' income group.")
    else:
        print(f"Insufficient studies for '{outcome}' to perform meta-analysis.")

All outcomes for 'Mortality_intraop_1st_line' in the 'Upper middle income' income group have non-positive sampling variances.
All outcomes for 'Mortality_intraop_1st_line' in the 'High income' income group have non-positive sampling variances.
No data for 'Mortality_intraop_1st_line' in the 'Low income' income group.
All outcomes for 'Mortality_periop_1st_line' in the 'Low income' income group have non-positive sampling variances.
No data for 'Failure_rate' in the 'Lower middle income' income group.
No data for 'Failure_rate' in the 'Low income' income group.


In [None]:
# Here we are checking whether the unique values in the 'Outcome' column of the DataFrame 'filtered_df' match with the
# keys in the dictionary 'measure_type_reverse'. If they match, then we will be ablt to map the outcome values to the measure type.
# This is important because later we want the results of the meta-analysis to be organized into the appropriate categories.
outcome_values = filtered_df['Outcome'].unique()
# measure_type_reverse_keys = measure_type_reverse.keys()
measure_type_reverse_values = measure_type.values()

if any(outcome in measure_type_reverse_values for outcome in outcome_values):
    print("There are matching values.")
else:
    print("There are no matching values.")

There are no matching values.


## Fix for adding a 4th row for mortality intraop

In [None]:
# Get unique values of 'Income_group + Outcome'
keys = set(filtered_df.Income_group + '_' + filtered_df.Outcome)

# then use 'keys' to create dictionary with required keys and default values
subplot_titles_dict = {key: '' for key in keys}

In [None]:
df6 = filtered_df.copy()
df6['result'] = filtered_df.Income_group +'_'+ filtered_df.Outcome # add new column by concatenating the values in Income_group and Outcome
df6['result'].unique()

array(['Upper middle income_Mortality_intraop_1st_line',
       'High income_Mortality_intraop_1st_line',
       'High income_Mortality_periop_1st_line',
       'High income_Failure_rate',
       'High income_Complication_rate_1st_line',
       'Lower middle income_Mortality_periop_1st_line',
       'Lower middle income_Complication_rate_1st_line',
       'Low income_Mortality_periop_1st_line',
       'Upper middle income_Failure_rate',
       'Upper middle income_Complication_rate_1st_line',
       'Lower middle income_Mortality_intraop_1st_line',
       'Upper middle income_Mortality_periop_1st_line',
       'Low income_Complication_rate_1st_line'], dtype=object)

In [None]:
df6 = df6.sort_values(by=['Outcome', 'Income_group'])
l = df6['result'].unique()

# Get the index of the 6th element
# create empty strings to create blank spaces in the subplot titles that will be created later in the code.
l = list(l)  # convert the numpy array to a list
l.insert(5+1, '')  # insert an empty value after the 6th value
l.insert(5+2, '')  # insert another empty value after the previous one
l.insert(9+1, '')  # insert an empty value after the 6th value
l

['High income_Complication_rate_1st_line',
 'Low income_Complication_rate_1st_line',
 'Lower middle income_Complication_rate_1st_line',
 'Upper middle income_Complication_rate_1st_line',
 'High income_Failure_rate',
 'Upper middle income_Failure_rate',
 '',
 '',
 'High income_Mortality_intraop_1st_line',
 'Lower middle income_Mortality_intraop_1st_line',
 '',
 'Upper middle income_Mortality_intraop_1st_line',
 'High income_Mortality_periop_1st_line',
 'Low income_Mortality_periop_1st_line',
 'Lower middle income_Mortality_periop_1st_line',
 'Upper middle income_Mortality_periop_1st_line']

# Let's generate the figure

In [None]:
# create a reverse dictionary where the keys are the outcome names, and the values are the outcome types.
# this is used to sort the subplot titles based on measure type using a lambda function.
measure_type_reverse={m:t for t,mlist in measure_type.items() for m in mlist}

# if you want subplot titles to use r=the dictionary keys to autogenerate
# subplot_titles = sorted(metastudy.keys(), key=lambda x: measure_type_reverse.get(x.split('_')[1]))

# this is the label of each subplot. they correspond to the order of what is contained in subplot_titles
# The titles list contains the labels for each subplot, which correspond to the order of the metastudy keys in subplot_titles.
titles = ['High income', 'Low income', 'Lower middle income','Upper middle income', 'High income', 'Low income', 'Lower middle income','Upper middle income', 'High income', 'Low income', 'Lower middle income','Upper middle income', 'High income', 'Low income', 'Lower middle income','Upper middle income']

# The row and col variables start at 1, and the loop increments the column index until it reaches 4, then it resets the column index to 1 and increments the row index.
# This continues until all the subplots have been created.
row = 1
col = 1

# Create subplot titles sorted based on measure type
# measure_type_reverse dictionary is used to generate the subplot_titles list by sorting the metastudy keys using.
subplot_titles = sorted(metastudy.keys(), key=lambda x: measure_type_reverse.get(x.split('_')[1], ''))
# Create subplots
fig5 = make_subplots(rows=4, cols=4, subplot_titles=titles, horizontal_spacing=0.2, vertical_spacing=0.05)
# fig5 = make_subplots(rows=4, cols=4, subplot_titles=subplot_titles, horizontal_spacing=0.2, vertical_spacing=0.05)

# Add traces for each measure
for m in l:
    try:
      # Add line for prediction boundary
      fig5.add_trace(
          go.Scatter(
              x=[
                round(metastudy[m]['crlb'], 2) if round(metastudy[m]['crlb'], 2) > 0 else 0,
                round(metastudy[m]['crub'], 2) if round(metastudy[m]['crub'], 2) < 1 else 1
            ],
              y=['Mixed model', 'Mixed model'],
              line=dict(color='black', width=2, dash='dot'),
              hovertemplate='Prediction boundary: %{x}<extra></extra>',
              marker_symbol='hourglass-open',
              marker_size=8
          ),
          row=row,
          col=col
    )

    # Add marker for estimated rate
      fig5.add_trace(
          go.Scatter(
              x=[round(metastudy[m]['pred'], 2)],
              y=['Mixed model'],
              mode='markers',
              marker=dict(color='black'),
              marker_symbol='diamond-wide',
              marker_size=10,
              hovertemplate='Rate estimate: %{x}<extra></extra>',
              error_x=dict(
                type='data',
                arrayminus=[round(metastudy[m]['cilb'], 2) if round(metastudy[m]['cilb'], 2) > 0 else 0],
                array=[round(metastudy[m]['ciub'], 2) if round(metastudy[m]['ciub'], 2) < 1 else 1]
            )
        ),
          row=row,
          col=col
    )

    # Add scatter plot for individual studies
      df_m = filtered_df[filtered_df.Income_group +'_'+ filtered_df.Outcome == m] # looks for rows where Outcome value is 'm'
      df_m = df_m.sort_values(by=['Year_of_publication'], ascending=False)

      color_key = measure_type_reverse.get(m)
      marker_color = color_dict.get(color_key, 'gray') if color_key is not None else 'gray'

      fig5.add_trace(
          go.Scatter(
            x=df_m['Rate'],
            y=df_m['Study'],
            text=df_m['Sample points'],
            customdata=df_m['Region'],
            mode='markers',
            marker = {
                'color': marker_color
            },
            marker_symbol='square',
            marker_size=np.log(50 / df_m['Variance']) + 5,
            hovertemplate='%{y}<br>Rate: %{x}<br>Number of samples: %{text}<br>' +
                          'Region: %{customdata}<extra></extra>',
            error_x=dict(
                type='data',
                array=2 * np.sqrt(df_m['Variance'])
            )
        ),
          row=row,
          col=col
    )

    # Update row and column indices
      if col == 4:
          col = 1
          row += 1
      else:
          col += 1
    except:
        print(f"Error processing {m}. Skipping to next measure...")
        fig5.add_trace(go.Scatter(), row=row, col=col)
        # Update row and column indices
        if col == 4:
            col = 1
            row += 1
        else:
            col += 1
        continue


# Define titles for each row
row_titles = [['Failure_rate', '', '', ''],
          ['Mortality_periop_1st_line', '', '', ''],
          ['Complication_rate_1st_line', '', '', ''],
          ['Mortality_intraop_1st_line', '', '', '']]

# Update layout
fig5.update_layout(
    title=dict(text='Outcome measures by income level'),
    width=1400,
    height=1900,
    showlegend=False
)

# Add titles on y-axis
for row in range(1, 4):
    for col in range(1, 5):
        title = row_titles[row-1][col-1]
        fig5.update_yaxes(title_text=title, row=row, col=col)



# Show plot
fig5.show()

Error processing . Skipping to next measure...
Error processing . Skipping to next measure...
Error processing High income_Mortality_intraop_1st_line. Skipping to next measure...
Error processing . Skipping to next measure...
Error processing Upper middle income_Mortality_intraop_1st_line. Skipping to next measure...
Error processing Low income_Mortality_periop_1st_line. Skipping to next measure...
