In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn3_circles
from tqdm import tqdm
from tableone import TableOne

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from pathlib import Path
import scipy.stats as st

In [None]:
!python --version

#### Load processed literature review data

In [None]:
cochrane_data = pd.ExcelFile('../data/cochrane_cv19_data_summary.xlsx')
cochrane_df = pd.read_excel(cochrane_data, 'COV-19 Rehab Outcomes Summary')
cochrane_df.columns = cochrane_df.iloc[2]
cochrane_df = cochrane_df.iloc[4:,2:].reset_index(col_level=0).iloc[:, 1:-4]

In [None]:
cochrane_df.columns

In [None]:
cochrane_df.head()

In [None]:
cochrane_df.shape

In [None]:
cochrane_df.columns

In [None]:
cochrane_df.dtypes

##### Filter included and handle NAs

In [None]:
cochrane_sel = cochrane_df[cochrane_df.Eligibility == 'Include']
cochrane_sel.shape

In [None]:
cochrane_sel['Consecutive interventions'] = cochrane_sel['Consecutive interventions'].fillna('Unknown')
cochrane_sel.rename(columns={'Length of rehabilitation (days)': 'Days of rehabilitation'}, inplace=True)
cochrane_sel['Days of rehabilitation'] = cochrane_sel['Days of rehabilitation'].fillna(-1).astype(int)
cochrane_sel['Population'] = cochrane_sel['Population'].fillna(-1).astype(int)
cochrane_sel['Outcome'] = cochrane_sel['Outcome'].fillna('Undefined')
cochrane_sel['Service'] = cochrane_sel['Service'].fillna('Unidentified rehabilitation service')
cochrane_sel['Collection start'] = cochrane_sel['Sample timeframe'].apply(lambda x: x.split(':')[0])
cochrane_sel['Collection end'] = cochrane_sel['Sample timeframe'].apply(lambda x: x.split(':')[1])

In [None]:
cochrane_days = cochrane_sel[cochrane_sel['Days of rehabilitation'] != -1]
cochrane_days_nmul = cochrane_days[cochrane_days['Multidisciplinary']=='N']
cochrane_days_mul = cochrane_days[cochrane_days['Multidisciplinary']=='Y']

In [None]:
cochrane_days_mul['Days of rehabilitation'].describe()

In [None]:
st.mannwhitneyu(x=cochrane_days_nmul['Days of rehabilitation'],
                y=cochrane_days_mul['Days of rehabilitation'],
                alternative = 'two-sided')

##### Summary stats

In [None]:
pd.value_counts(cochrane_sel['Edition'])

In [None]:
cochrane_days['Days of rehabilitation'].hist(bins=20)

In [None]:
cochrane_days['Population'].hist(bins=10)

In [None]:
pd.value_counts(cochrane_sel['Multidisciplinary'])

In [None]:
#print(cochrane_days['Days of rehabilitation'].median(), st.iqr(cochrane_days['Days of rehabilitation'], axis=0))
#print(st.t.interval(alpha=0.95,
              #df=len(cochrane_days)-1,
              #loc=np.median(cochrane_days['Days of rehabilitation']),
              #scale=st.sem(cochrane_days['Days of rehabilitation'])))
print('Rehab days')
print(len(cochrane_days[cochrane_days['Days of rehabilitation']>0])/len(cochrane_sel))
print(cochrane_days[cochrane_days['Days of rehabilitation']>0]['Days of rehabilitation'].describe())
print('Population')
print(cochrane_sel['Population'].median())
print(cochrane_sel['Population'].describe())

#print(cochrane_sel['Population'].mean(), cochrane_sel['Population'].std())
#print(st.t.interval(alpha=0.95,
              #df=len(cochrane_sel)-1,
              #loc=np.mean(cochrane_sel['Population']),
              #scale=st.sem(cochrane_sel['Population'])))

In [None]:
len(cochrane_days)

In [None]:
pd.value_counts(cochrane_sel['Routine EHR data'])

In [None]:
pd.value_counts(cochrane_sel['Service'], normalize=True)

In [None]:
pd.value_counts(cochrane_sel['Consecutive interventions'])

In [None]:
pd.value_counts(cochrane_sel['ICU-only'], normalize=True)

In [None]:
len(cochrane_days_mul), len(cochrane_days_nmul)

In [None]:
print(cochrane_days_nmul['Days of rehabilitation'].mean(), cochrane_days_nmul['Days of rehabilitation'].std())
print(st.t.interval(alpha=0.95,
              df=len(cochrane_days_nmul)-1,
              loc=np.mean(cochrane_days_nmul['Days of rehabilitation']),
              scale=st.sem(cochrane_days_nmul['Days of rehabilitation'])))

print(cochrane_days_mul['Days of rehabilitation'].mean(), cochrane_days_mul['Days of rehabilitation'].std())
print(st.t.interval(alpha=0.95,
              df=len(cochrane_days_mul)-1,
              loc=np.mean(cochrane_days_mul['Days of rehabilitation']),
              scale=st.sem(cochrane_days_mul['Days of rehabilitation'])))

In [None]:
cochrane_days['Population'].hist()

In [None]:
cochrane_days['Days of rehabilitation'].hist()

In [None]:
cochrane_sel.columns

In [None]:
cochrane_sel.isnull().sum()

#### Visualise COCHRANE editions

In [None]:
cochrane_df_all_editions = cochrane_df[cochrane_df.Edition.notnull()]
ch_month_all = pd.to_datetime(cochrane_df_all_editions['Edition']).dt.to_period('M').value_counts().sort_index()
ch_month_all.index = pd.PeriodIndex(ch_month_all.index)
ch_month_all = ch_month_all.rename_axis('month').reset_index(name='counts')
ch_month_all

In [None]:
ch_month = pd.to_datetime(cochrane_sel['Edition']).dt.to_period('M').value_counts().sort_index()
ch_month.index = pd.PeriodIndex(ch_month.index)
ch_month = ch_month.rename_axis('month').reset_index(name='counts')
ch_month

In [None]:
## Export
f = Path.cwd().joinpath("figures")
if not f.is_dir(): f.mkdir()
f1 = f.joinpath("reh_cover_series_timeline.png")

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=ch_month['month'].astype(dtype=str),
                        y=ch_month['counts'],
                        marker_color='darkgreen', text="counts", name='Included in the study'))
fig.add_trace(go.Scatter(x=ch_month_all['month'].astype(dtype=str),
                        y=ch_month_all['counts'],
                        marker_color='indianred', text="counts", name='All present in edition'))

fig.update_layout({"title": '<b>Number of publications selected by REH-COVER edition</b>',
                   "template": "simple_white",
                   "xaxis": {"title":"<b>Timeline</b>"},
                   "yaxis": {"title":"<b>Total papers</b>"},
                   "showlegend": True})
fig.update_xaxes(range = ['2020-02','2022-03'])
fig.update_xaxes(minor=dict(ticks="inside", showgrid=False), dtick="M2", tickformat="%b\n%Y")
fig.write_image(f1, format='png', engine='kaleido', height=300, width=700, scale=2)
fig.show()

In [None]:
pd.value_counts(cochrane_sel['Collection end'])

In [None]:
def map_collection_months(x):
    dtfield = x.split('-')
    if dtfield[0] == 'Jan':
        return '20'+str(dtfield[1])+'-01-01'
    elif dtfield[0] == 'Feb':
        return '20'+str(dtfield[1])+'-02-01'
    elif dtfield[0] == 'Mar':
        return '20'+str(dtfield[1])+'-03-01'
    elif dtfield[0] == 'Apr':
        return '20'+str(dtfield[1])+'-04-01'
    elif dtfield[0] == 'May':
        return '20'+str(dtfield[1])+'-05-01'
    elif dtfield[0] == 'Jun':
        return '20'+str(dtfield[1])+'-06-01'
    elif dtfield[0] == 'Jul':
        return '20'+str(dtfield[1])+'-07-01'
    elif dtfield[0] == 'Aug':
        return '20'+str(dtfield[1])+'-08-01'
    elif dtfield[0] == 'Sep':
        return '20'+str(dtfield[1])+'-09-01'
    elif dtfield[0] == 'Oct':
        return '20'+str(dtfield[1])+'-10-01'
    elif dtfield[0] == 'Nov':
        return '20'+str(dtfield[1])+'-11-01'
    elif dtfield[0] == 'Dec':
        return '20'+str(dtfield[1])+'-12-01'
    else:
        return -1

In [None]:
cochrane_sel['Collection start'] = cochrane_sel['Collection start'].apply(map_collection_months)
cochrane_sel['Collection end'] = cochrane_sel['Collection end'].apply(map_collection_months)

In [None]:
## Remove/consolidate certain rehab services
cochrane_gantt = cochrane_sel[cochrane_sel['Service'] != 'Unidentified rehabilitation service']
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(['Hospital'], 'Hospital-based rehabilitation')
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["Community-based rehabilitation (CBR)"], "Community-based rehabilitation")
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["Hospital-based rehabilitation"], "Rehabilitation in acute care")
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["General outpatient rehabilitation"], "Outpatient rehabilitation")
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["Specialised outpatient rehabilitation"], "Outpatient rehabilitation")
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["General postacute rehabilitation"], "Post-acute rehabilitation")
cochrane_gantt['Service'] = cochrane_gantt['Service'].replace(["Specialised postacute rehabilitation"], "Post-acute rehabilitation")

In [None]:
cochrane_gantt.head()

In [None]:
# Remove single time-point collections
cochrane_gantt = cochrane_gantt[cochrane_gantt['Collection start'] != cochrane_gantt['Collection end']]

In [None]:
len(cochrane_gantt)

In [None]:
cochrane_gantt.Service.unique()

In [None]:
fig = px.timeline(cochrane_gantt, x_start="Collection start", x_end="Collection end", y="Author", color='Service',
                 color_discrete_map={"Rehabilitation in acute care": 'red',
                                     "Post-acute rehabilitation": 'darkorange',
                                     "Rehabilitation services at home": 'green',
                                     "Outpatient rehabilitation": 'darkblue'},
                  category_orders=dict(Service=["Outpatient rehabilitation",
                                                "Rehabilitation services at home",
                                                "Post-acute rehabilitation",
                                                "Rehabilitation in acute care"
                                                ]),
                 labels={'Service': '<b>Service</b>'})
fig.update_layout({"title": '<b>Reported COVID-19 rehabilitation service delivery timeline</b>',
                   "template": "simple_white",
                   "xaxis": {"title":"<b>Intervention timeframe</b>"},
                   "yaxis": {"title":"<b>Publication</b>"},
                   "showlegend": True,
                   "height": 800,
                  "legend":{'traceorder':'reversed'}}, title=dict(font=dict(size = 22, color = "black")))
fig.update_xaxes(range = ['2020-01-01','2021-06-01'], dtick="M1", tickformat="%b\n%Y")
fig.update_xaxes(title = dict(font=dict(size = 18, color = "black")))
fig.update_yaxes(title = dict(font=dict(size = 18, color = "black")))
fig.update_layout(legend=dict(font=dict(size=14)))
fig.write_image(f.joinpath("reh_cover_service_timeline.png"), format='png', engine='kaleido', height=900, width=1000, scale=2)
fig.show()

#### Look at intervention categories

In [None]:
pd.value_counts(cochrane_sel['Intervention category'])

In [None]:
pd.value_counts(cochrane_sel['Intervention sub-category'])

#### Population count plot

In [None]:
cochrane_count = cochrane_sel[cochrane_sel.Population > 0]

In [None]:
len(cochrane_count)

In [None]:
print(cochrane_count['Population'].mean(), cochrane_count['Population'].std())
print(st.t.interval(alpha=0.95,
              df=len(cochrane_count)-1,
              loc=np.mean(cochrane_count['Population']),
              scale=st.sem(cochrane_count['Population'])))

In [None]:
coch_ehr = cochrane_count[cochrane_count['Routine EHR data'].isin(['Yes', 'Partially'])]
coch_nehr = cochrane_count[cochrane_count['Routine EHR data']=='No']

In [None]:
len(coch_ehr), len(coch_nehr)

In [None]:
print(cochrane_count['Population'].median(), cochrane_count['Population'].mean())
print(coch_ehr['Population'].agg(['count', 'min', 'max', 'median']))
print(coch_nehr['Population'].agg(['count', 'min', 'max', 'median']))

In [None]:
print(st.t.interval(alpha=0.95,
              df=len(coch_ehr)-1,
              loc=np.median(coch_ehr['Population']),
              scale=st.sem(coch_ehr['Population'])))

print(st.t.interval(alpha=0.95,
              df=len(coch_nehr)-1,
              loc=np.median(coch_nehr['Population']),
              scale=st.sem(coch_nehr['Population'])))

In [None]:
w, pvalue = st.shapiro(coch_ehr['Population'])
print(w, pvalue)

w, pvalue = st.shapiro(coch_nehr['Population'])
print(w, pvalue)

In [None]:
st.mannwhitneyu(x=coch_ehr['Population'], y=coch_nehr['Population'], alternative = 'two-sided')

In [None]:
max(cochrane_count['Population'])

In [None]:
cochrane_count = cochrane_sel[cochrane_sel['Population'] > 0]
cochrane_count = cochrane_count.sort_values('Population')
#cochrane_count = cochrane_count[~((cochrane_count['Population']>=150)&(cochrane_count['Routine EHR data'] == 'No'))]
cochrane_count = cochrane_count[cochrane_count['Population']<400]
cochrane_count['ICU-only'] = cochrane_count['ICU-only'].map({'N': 'Not ICU-only', 'Y': 'ICU-only'})

In [None]:
fig = px.box(cochrane_count, x="ICU-only", y="Population",
             color='Routine EHR data',
             height=400)

fig.update_layout({"title": '<b>Sample size distribution of studies with a recorded population</b>',
                   "template": "simple_white",
                   "xaxis": {"title":"<b>Cases</b>"},
                   "yaxis": {"title":"<b>Population</b>"},
                   "showlegend": True,
                   "height": 400,
                   "width": 600
                  })
fig.update_xaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_yaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_layout(legend=dict(title='Sample utilises EHR data'))
fig.write_image(f.joinpath("reh_cover_sample_size.png"), format='png', engine='kaleido', height=400, width=600, scale=2)
fig.show()

In [None]:
fig = px.box(cochrane_count, x="Routine EHR data", y="Population",
             height=400, category_orders={'Routine EHR data': ['Yes', 'No']})

fig.update_layout({"title": '<b>Sample size distribution of studies with a recorded population</b>',
                   "template": "simple_white",
                   "xaxis": {"title":"<b>Use of EHR data</b>"},
                   "yaxis": {"title":"<b>Population</b>"},
                   "showlegend": True,
                   "height": 400,
                   "width": 600
                  })
fig.update_xaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_yaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_layout(legend=dict(title='Sample utilises EHR data'))
fig.write_image(f.joinpath("reh_cover_sample_size_2.png"), format='png', engine='kaleido', height=400, width=600, scale=2)
fig.show()

In [None]:
cochrane_days = cochrane_sel[cochrane_sel['Days of rehabilitation'] > 0]
cochrane_days = cochrane_days[cochrane_days['Days of rehabilitation'] < 72]
cochrane_days['ICU-only'] = cochrane_days['ICU-only'].map({'N': 'Not ICU-only', 'Y': 'ICU-only'})
fig = px.box(cochrane_days, x="Routine EHR data", y="Days of rehabilitation",
             height=400)

fig.update_layout({"title": '<b>Days of rehabilitation among studies with recorded duration</b>',
                   "template": "simple_white",
                   "xaxis": {"title":"<b>Use of EHR data</b>"},
                   "yaxis": {"title":"<b>Number of days</b>"},
                   "showlegend": True,
                   "height": 400,
                   "width": 600
                  })
fig.update_xaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_yaxes(title = dict(font=dict(size = 15, color = "black")))
fig.update_layout(legend=dict(title='Sample utilises EHR data'))
fig.write_image(f.joinpath("reh_cover_rehab_time.png"), format='png', engine='kaleido', height=400, width=600, scale=2)
fig.show()

In [None]:
st.mannwhitneyu(x=cochrane_count[(cochrane_count['Routine EHR data']=='Yes')]['Population'],
                y=cochrane_count[(cochrane_count['Routine EHR data']=='No')]['Population'],
                alternative = 'two-sided')

In [None]:
st.mannwhitneyu(x=cochrane_days[(cochrane_days['Routine EHR data']=='Yes')]['Days of rehabilitation'],
                y=cochrane_days[(cochrane_days['Routine EHR data']=='No')]['Days of rehabilitation'],
                alternative = 'two-sided')

In [None]:
st.mannwhitneyu(x=cochrane_count[(cochrane_count['Routine EHR data']=='Yes')&(cochrane_count['ICU-only']=='Not ICU-only')]['Population'],
                y=cochrane_count[(cochrane_count['Routine EHR data']=='Yes')&(cochrane_count['ICU-only']=='ICU-only')]['Population'],
                alternative = 'two-sided')

In [None]:
st.mannwhitneyu(x=cochrane_count[(cochrane_count['Routine EHR data']=='Yes')&(cochrane_count['ICU-only']=='ICU-only')]['Population'],
                y=cochrane_count[(cochrane_count['Routine EHR data']=='No')&(cochrane_count['ICU-only']=='ICU-only')]['Population'],
                alternative = 'two-sided')

#### Category types

In [None]:
len(cochrane_sel[cochrane_sel['Intervention category'].str.contains('Cardio')])

In [None]:
cochrane_sel[cochrane_sel['Intervention category'].str.contains('Cardio')]

In [None]:
pd.value_counts(cochrane_sel['Intervention category'])

In [None]:
print(len(cochrane_sel[cochrane_sel['Intervention category'].str.lower().str.contains('alternative')]))
print(len(cochrane_sel[cochrane_sel['Intervention category'].str.lower().str.contains('respiratory')])/len(cochrane_sel))
#print(cochrane_sel[cochrane_sel['Intervention category'].str.lower().str.contains('physical')]['Intervention category'])

In [None]:
pd.value_counts(cochrane_sel['Intervention sub-category'])

#### Get study summary table

In [None]:
cochrane_sel.columns

In [None]:
pd.value_counts(cochrane_sel['Study design'])

In [None]:
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Historical cohort"], "Cohort study")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Inception Cohort study"], "Cohort study")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Quasi-experimental study"], "Other")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Randomised controlled trial"], "Controlled trial")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Pilot controlled trial"], "Controlled trial")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Non randomised controlled trial"], "Controlled trial")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Case-control study"], "Other")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Cross-sectional study"], "Other")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Before-after/time series"], "Other")
cochrane_sel['Study design'] = cochrane_sel['Study design'].replace(["Retrospective uncontrolled observational study"], "Cohort study")

In [None]:
t1_cols = ['Population', 'ICU-only', 'Routine EHR data', 'Service', 'Days of rehabilitation', 'Consecutive interventions',
           'Multidisciplinary']
categorical = ['Routine EHR data', 'ICU-only', 'Service', 'Consecutive interventions', 'Multidisciplinary']
groupby = 'Study design'
nonnormal = ['Population', 'Days of rehabilitation']

In [None]:
cochrane_sel[['Population', 'ICU-only', 'Routine EHR data', 'Service', 'Days of rehabilitation', 'Consecutive interventions',
           'Multidisciplinary']].dtypes

In [None]:
coc_df = pd.DataFrame(columns=['Population', 'ICU-only', 'Routine EHR data', 'Service', 'Days of rehabilitation', 'Consecutive interventions',
           'Multidisciplinary', 'Study design'])
coc_df['Population'] = cochrane_sel['Population']
coc_df['ICU-only'] = cochrane_sel['ICU-only']
coc_df['Routine EHR data'] = cochrane_sel['Routine EHR data']
coc_df['Service'] = cochrane_sel['Service']
coc_df['Days of rehabilitation'] = cochrane_sel['Days of rehabilitation']
coc_df['Days of rehabilitation'] = np.where(coc_df['Days of rehabilitation'] == -1, np.nan, coc_df['Days of rehabilitation'])
coc_df['Consecutive interventions'] = cochrane_sel['Consecutive interventions']
coc_df['Multidisciplinary'] = cochrane_sel['Multidisciplinary']
coc_df['Study design'] = cochrane_sel['Study design']
coc_df['Service'] = coc_df['Service'].replace(['Unidentified rehabilitation service'], 'Unknown')
coc_df['Service'] = coc_df['Service'].replace(['Hospital'], 'Hospital-based rehabilitation')
coc_df['Service'] = coc_df['Service'].replace(["Community-based rehabilitation (CBR)"], "Community-based rehabilitation")
coc_df['Service'] = coc_df['Service'].replace(["Hospital-based rehabilitation"], "Rehabilitation in acute care")
coc_df['Service'] = coc_df['Service'].replace(["General outpatient rehabilitation"], "Outpatient rehabilitation")
coc_df['Service'] = coc_df['Service'].replace(["Specialised outpatient rehabilitation"], "Outpatient rehabilitation")
coc_df['Service'] = coc_df['Service'].replace(["General postacute rehabilitation"], "Post-acute rehabilitation")
coc_df['Service'] = coc_df['Service'].replace(["Specialised postacute rehabilitation"], "Post-acute rehabilitation")

coc_df = coc_df.reset_index(drop=True)

In [None]:
pd.value_counts(coc_df['Service'])

In [None]:
coc_df

In [None]:
st = TableOne(coc_df, t1_cols, categorical, groupby, nonnormal, pval=True)

In [None]:
st.to_html('rehab_summary.html')