In [87]:
%%capture
import os
from pathlib import Path
import pandas as pd
from dj_notebook import activate
import numpy as np
from django_pandas.io import read_frame

env_file = os.environ["META_ENV"]
reports_folder = Path(os.environ["META_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["META_ANALYSIS_FOLDER"])
pharmacy_folder = Path(os.environ["META_PHARMACY_FOLDER"])
plus = activate(dotenv_file=env_file)
pd.set_option('future.no_silent_downcasting', True)

In [88]:

import pdfkit
from datetime import date
from edc_pdutils.dataframes import get_subject_visit
from meta_analytics.dataframes import get_glucose_fbg_ogtt_df, get_glucose_fbg_df
from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39
from meta_analytics.dataframes import GlucoseEndpointsByDate
from scipy.stats import chi2
from great_tables import loc, style, md
from meta_analytics.dataframes import get_eos_df
from meta_analytics.utils import df_as_great_table, df_as_great_table2
from meta_prn.models import LossToFollowup
from edc_visit_schedule.models import SubjectScheduleHistory
from edc_appointment.analytics import get_appointment_df
from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT, SCHEDULED_APPT, COMPLETE_APPT, INCOMPLETE_APPT, IN_PROGRESS_APPT, UNSCHEDULED_APPT
from edc_constants.constants import YES
from meta_consent.models import SubjectConsentV1Ext


In [89]:
html_data = []
cutoff_date = date(2025,3, 31)
end_of_trial_date= date(2026,3, 1)
document_title = f"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {cutoff_date.strftime('%d %B %Y')}</h5>"
study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'
pdf_filename = f"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf"


In [90]:
df_visit = get_subject_visit("meta_subject.subjectvisit")
df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]
df_appointments = get_appointment_df()
df_appointments["site_id"] = df_appointments.site_id.astype(str)
cls = GlucoseEndpointsByDate()
cls.run()
df_endpoint = cls.endpoint_only_df.copy()
df_glucose = get_glucose_fbg_ogtt_df()
df_glucose_fbg = get_glucose_fbg_df()
df_glucose = pd.concat([df_glucose, df_glucose_fbg])

enrolled = df_visit.copy()
enrolled["site_id"] = enrolled["site_id"].astype(str)
enrolled_pivot = (
    enrolled
    .query("visit_code==1000.0").groupby(["site_id"])
    .size()
    .reset_index()
    .pivot_table(columns="site_id", values=0, observed=True)
)
enrolled_pivot.columns.name=""
enrolled_pivot["total"] = enrolled_pivot[["10", "20","30","40","60"]].sum(axis=1)



In [91]:
column_headers = {"label": "Label", "visit_code": "Visit code", "10": "Hindu Mandal", "20": "Amana", "30": "Temeke", "40": "Mwananyamala", "60": "Mnazi Moja", "total": "Total"}
column_headers_with_str = {"label": "Label", "10_str": "Hindu Mandal", "20_str": "Amana", "30_str": "Temeke", "40_str": "Mwananyamala", "60_str": "Mnazi Moja", "total_str": "Total"}

In [92]:
# Table 1a Visits completed to date
df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=["visit_code", "site_id"]).size().to_frame().reset_index()
df_tbl1.columns = ["visit_code", "site_id", "visits"]
df1 = df_tbl1.pivot(index="visit_code", columns="site_id", values="visits").reset_index()
df1.columns.name = None
df1.columns = ['visit_code', "10", "20", "30", "40", "60"]
df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)
df1.fillna(0, inplace=True)
df_attended = df1.copy().reset_index(drop=True)
df_attended = df_attended.fillna(0.0)

In [93]:
gt = df_as_great_table(
    df_attended[["visit_code", "10", "20", "30", "40", "60", "total"]],
    title="Table 1: Visits completed to date"
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code"])
    .data_color(
        columns=["visit_code"],
        palette=["lavender", "thistle"],
        domain=[2000, 5000],
        na_color="white"
    )
    .tab_source_note(source_note=f"Includes visits reports submitted for participants eventually withdrawn on late exclusion criteria.")
)
html_data.append(gt.as_raw_html())
gt.show()

Table 1: Visits completed to date,Table 1: Visits completed to date,Table 1: Visits completed to date,Table 1: Visits completed to date,Table 1: Visits completed to date,Table 1: Visits completed to date,Table 1: Visits completed to date
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1000.0,185.0,391.0,340.0,545.0,230.0,1691.0
1005.0,160.0,355.0,309.0,481.0,214.0,1519.0
1010.0,172.0,358.0,305.0,493.0,212.0,1540.0
1030.0,171.0,358.0,298.0,473.0,212.0,1512.0
1060.0,169.0,353.0,285.0,460.0,211.0,1478.0
1090.0,169.0,337.0,269.0,436.0,204.0,1415.0
1120.0,160.0,341.0,259.0,432.0,199.0,1391.0
1150.0,160.0,306.0,242.0,409.0,180.0,1297.0
1180.0,143.0,297.0,222.0,392.0,163.0,1217.0
1210.0,130.0,268.0,199.0,365.0,145.0,1107.0


In [94]:
# Table 1b Total scheduled appointments
df_appt_pivot = (
    df_appointments.query("appt_reason==@SCHEDULED_APPT")
    .groupby(["visit_code", "site_id"])
    .size()
    .to_frame()
    .reset_index()
    .pivot(index="visit_code", columns="site_id", values=0)
    .reset_index()
    .fillna(0)
)

df_appt_pivot["total"] = df_appt_pivot.iloc[:,1:].sum(axis=1)
df_appt_pivot.columns.name = None
gt = df_as_great_table(
    df_appt_pivot,
    title="Table 1b: Total appointments",
    subtitle="Total possible appointments not including unscheduled appointments"

)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code"])
    .data_color(
        columns=["visit_code"],
        palette=["lavender", "thistle"],
        domain=[2000, 5000],
        na_color="white"
    )
)
html_data.append(gt.as_raw_html())
gt.show()

Table 1b: Total appointments,Table 1b: Total appointments,Table 1b: Total appointments,Table 1b: Total appointments,Table 1b: Total appointments,Table 1b: Total appointments,Table 1b: Total appointments
Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments,Total possible appointments not including unscheduled appointments
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1000.0,185,391,340,545,230,1691
1005.0,180,374,333,532,227,1646
1010.0,179,372,329,531,224,1635
1030.0,177,372,327,528,224,1628
1060.0,176,366,325,523,220,1610
1090.0,171,364,323,522,215,1595
1120.0,169,363,322,513,210,1577
1150.0,165,355,320,504,200,1544
1180.0,163,353,319,500,196,1531
1210.0,161,351,317,493,193,1515


In [95]:
# Table 1c Past scheduled appointments -- no information provided
df_appt_pivot = (
    df_appointments.query("appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])")
    .groupby(["visit_code", "site_id"])
    .size()
    .to_frame()
    .reset_index()
    .pivot(index="visit_code", columns="site_id", values=0)
    .reset_index()
    .fillna(0)
)
df_appt_pivot["total"] = df_appt_pivot.iloc[:,1:].sum(axis=1)
df_appt_pivot.columns.name = None
gt = df_as_great_table(
    df_appt_pivot,
    title="Table 1c: Past appointments not attended/not reported",
    subtitle="Expected by now but no information provided by site",
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code"])
    .data_color(
        columns=["visit_code"],
        palette=["lavender", "thistle"],
        domain=[2000, 5000],
        na_color="white"
    )
    .tab_source_note(source_note=f"Scheduled appointment date is before {cutoff_date.strftime('%d %B %Y')}.")
)
html_data.append(gt.as_raw_html())
gt.show()

Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported,Table 1c: Past appointments not attended/not reported
Expected by now but no information provided by site,Expected by now but no information provided by site,Expected by now but no information provided by site,Expected by now but no information provided by site,Expected by now but no information provided by site,Expected by now but no information provided by site,Expected by now but no information provided by site
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1005.0,0.0,1.0,1.0,0.0,2.0,4.0
1010.0,0.0,2.0,2.0,2.0,1.0,7.0
1030.0,1.0,6.0,2.0,3.0,4.0,16.0
1060.0,4.0,5.0,0.0,1.0,6.0,16.0
1090.0,1.0,6.0,0.0,5.0,6.0,18.0
1120.0,1.0,12.0,2.0,1.0,7.0,23.0
1150.0,3.0,18.0,11.0,5.0,10.0,47.0
1180.0,3.0,19.0,23.0,4.0,7.0,56.0
1210.0,2.0,32.0,43.0,18.0,12.0,107.0
1240.0,2.0,27.0,40.0,21.0,9.0,99.0


In [96]:
# Table 1d Unscheduled appointments
df_appt = df_appointments.query("appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT").copy().reset_index(drop=True)
df_appt['visit_code'] = df_appt['visit_code'].astype(int)
df_appt['visit_code'] = df_appt['visit_code'].astype(str)

subjects_with_unscheduled = df_appt.subject_identifier.nunique()

df_appt_pivot = (
    df_appt
    .groupby(["visit_code", "site_id"])
    .size()
    .to_frame()
    .reset_index()
    .pivot(index="visit_code", columns="site_id", values=0)
    .reset_index()
    .fillna(0)
)
df_appt_pivot["total"] = df_appt_pivot.iloc[:,1:].sum(axis=1)
df_appt_pivot.columns.name = None
df_appt_pivot[["10", "20", "30", "40", "60", "total"]] = df_appt_pivot[["10", "20", "30", "40", "60", "total"]].astype('float64')


# add totals row
sum_row = df_appt_pivot.select_dtypes(include='float64').sum()
sum_row['visit_code'] = 'Total'
sum_row_df = pd.DataFrame(sum_row).T
df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0).reset_index(drop=True)

gt = df_as_great_table(
    df_appt_pivot,
    title="Table 1d: Unscheduled appointments",
    subtitle="Appointments with sequence>0 grouped by visit code",
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code"])
    .data_color(
        columns=["visit_code"],
        palette=["lavender", "thistle"],
        domain=[2000, 5000],
        na_color="white"
    )
    .fmt_number(columns=["10", "20", "30", "40", "60", "total"], decimals=0)
    .tab_source_note(source_note=f"{subjects_with_unscheduled} participants had at least one unscheduled appointment.")
)
html_data.append(gt.as_raw_html())
gt.show()

Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments,Table 1d: Unscheduled appointments
Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code,Appointments with sequence>0 grouped by visit code
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1000,1,26,6,2,1,36
1005,0,1,3,3,1,8
1010,6,5,2,0,1,14
1030,9,13,9,12,1,44
1060,5,16,13,8,3,45
1090,6,7,9,8,2,32
1120,6,25,7,7,1,46
1150,1,9,4,6,0,20
1180,6,14,3,11,5,39
1210,3,4,1,3,0,11


In [97]:
# Table 1e Future scheduled appointments
df_appt_pivot = (
    df_appointments.query("@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])")
    .groupby(["visit_code", "site_id"])
    .size()
    .to_frame()
    .reset_index()
    .pivot(index="visit_code", columns="site_id", values=0)
    .reset_index()
    .fillna(0)
)
df_appt_pivot["total"] = df_appt_pivot.iloc[:,1:].sum(axis=1)
df_appt_pivot.columns.name = None
gt = df_as_great_table(
    df_appt_pivot,
    title="Table 1e: Future appointments",
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code"])
    .data_color(
        columns=["visit_code"],
        palette=["lavender", "thistle"],
        domain=[2000, 5000],
        na_color="white"
    )
    .fmt_number(columns=["10", "20", "30", "40", "60", "total"], decimals=0)
    .tab_source_note(source_note=f"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.")
)
html_data.append(gt.as_raw_html())
gt.show()

Table 1e: Future appointments,Table 1e: Future appointments,Table 1e: Future appointments,Table 1e: Future appointments,Table 1e: Future appointments,Table 1e: Future appointments,Table 1e: Future appointments
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1150.0,0,1,0,4,1,6
1180.0,11,18,21,23,17,90
1210.0,23,25,29,41,27,145
1240.0,39,53,71,98,66,327
1270.0,66,76,96,167,87,492
1300.0,76,144,143,228,126,717
1330.0,91,175,169,272,135,842
1360.0,89,228,183,312,106,918
1390.0,18,38,14,21,2,93
1420.0,19,47,24,30,1,121


In [98]:
# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed
subject_count = df_visit.query("visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])").subject_identifier.nunique()
df_tbl = (
    df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]
    .groupby(by=["visit_code", "site_id"])
    .size()
    .to_frame()
    .reset_index()
)
df_tbl.columns = ["visit_code", "site_id", "visits"]
df_tbl_pivot = df_tbl.pivot(index="visit_code", columns="site_id", values="visits").reset_index()
df_tbl_pivot.columns.name = None
df_tbl_pivot.columns = ['visit_code', "10", "20", "30", "40", "60"]
df_tbl_pivot['total'] = df_tbl_pivot[['10', '20', '30', '40', '60']].sum(axis=1)
df_missed = (
    df_tbl_pivot
    .fillna(0)
    .copy()
    .set_index(["visit_code"])
)

df_attended_display = df_attended.copy()
df_attended_display = (
    df_attended_display
    .set_index(["visit_code"])
)

attended_and_missed = df_attended_display + df_missed
attended_and_missed = (
    attended_and_missed
    .fillna(0)
    .reset_index()
    .set_index(["visit_code"])
)

attended_and_missed_perc = df_missed/attended_and_missed
attended_and_missed_perc = (
    attended_and_missed_perc
    .fillna(0)
    .reset_index()
    .set_index(["visit_code"])
)

df_result = df_missed.merge(attended_and_missed_perc, on=["visit_code"], suffixes=("", "_perc"))
for col in  ["10", "20", "30", "40", "60", "total"]:
    col_perc = f"{col}_perc"
    df_result[col] = df_result.apply(lambda x: f"{x[col]} ({x[col_perc]*100:.2f})", axis=1)
df_result = df_result.reset_index().sort_values(by=["visit_code"], ascending=True)
df_result = df_result.fillna(0.0)

In [99]:
df_table = df_result[["visit_code", "10", "20", "30", "40", "60", "total"]].copy()
gt = df_as_great_table(
    df_table,
    title="Table 2a: Visits Missed to Date",
    subtitle="as % of Visits Attended + Visits Missed"
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["visit_code", "label"])
    .tab_style(
        style=[style.fill(color="snow"), style.text(color="black")],
        locations=loc.body(
            columns=[0],
            rows=list(range(0, len(df_table))),
        ),
    )
    .tab_source_note(source_note=f"{subject_count} participants had at least one missed visit.")

)
html_data.append(gt.as_raw_html())
gt.show()


Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date,Table 2a: Visits Missed to Date
as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed,as % of Visits Attended + Visits Missed
Visit code,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1005.0,20.0 (11.11),18.0 (4.83),23.0 (6.93),51.0 (9.59),11.0 (4.89),123.0 (7.49)
1010.0,7.0 (3.91),12.0 (3.24),22.0 (6.73),36.0 (6.81),11.0 (4.93),88.0 (5.41)
1030.0,5.0 (2.84),8.0 (2.19),27.0 (8.31),52.0 (9.90),8.0 (3.64),100.0 (6.20)
1060.0,3.0 (1.74),8.0 (2.22),40.0 (12.31),62.0 (11.88),3.0 (1.40),116.0 (7.28)
1090.0,1.0 (0.59),21.0 (5.87),54.0 (16.72),81.0 (15.67),5.0 (2.39),162.0 (10.27)
1120.0,8.0 (4.76),10.0 (2.85),61.0 (19.06),80.0 (15.62),4.0 (1.97),163.0 (10.49)
1150.0,1.0 (0.62),29.0 (8.66),67.0 (21.68),85.0 (17.21),9.0 (4.76),191.0 (12.84)
1180.0,2.0 (1.38),17.0 (5.41),51.0 (18.68),78.0 (16.60),7.0 (4.12),155.0 (11.30)
1210.0,5.0 (3.70),21.0 (7.27),43.0 (17.77),67.0 (15.51),6.0 (3.97),142.0 (11.37)
1240.0,11.0 (9.40),10.0 (3.75),29.0 (14.08),49.0 (13.57),4.0 (3.51),103.0 (9.67)


In [100]:
# Table 2b: Number of missed visits by participant
subject_count = df_visit.query("visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])").subject_identifier.nunique()
df_tbl = (
    df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]
    .groupby(by=["subject_identifier", "site_id"])
    .size()
    .to_frame()
    .reset_index()
)
df_tbl.columns = ["subject_identifier", "site_id", "missed_count"]
df_tbl["category"] = pd.cut(df_tbl["missed_count"], bins=[0, 1, 3, 5, 7, 100], labels=["Missed at least 1", "2 to 3", "4 to 5", "6 to 7", "missed more than 7"])
df_tbl_pivot = df_tbl.pivot_table(index="category", columns="site_id", values="missed_count", observed=False, aggfunc="count").reset_index()

df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)

sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()
sum_row['category'] = 'Total'


df_tbl_pivot = (
    pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)
    .rename(columns={10: "10", 20: "20", 30: "30", 40: "40", 60: "60"})
)

gt = df_as_great_table(
    df_tbl_pivot,
    title="Table 2b: Number of participants who missed one or more visits",
)
gt = (
    gt
    .cols_label({"category": "Category", **{k:v for k, v in column_headers.items() if k not in ["visit_code", "label"]}})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["category"])
    .tab_style(
        style=[style.fill(color="snow"), style.text(color="black")],
        locations=loc.body(
            columns=[0],
            rows=list(range(0, len(df_table))),
        ),
    )
)
html_data.append(gt.as_raw_html())
gt.show()



Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits,Table 2b: Number of participants who missed one or more visits
Category,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Missed at least 1,29,52,50,76,26,233
2 to 3,19,39,54,62,11,185
4 to 5,0,6,27,30,4,67
6 to 7,0,1,20,28,0,49
missed more than 7,0,0,3,17,0,20
Total,48,98,154,213,41,554


In [101]:
# func for tables 3,4,5
def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:
    row_df = row_df.groupby(by=["site_id"]).site_id.count().to_frame(name="n")
    row_df["label"] = label
    row_df = row_df.reset_index()
    row_df = row_df.pivot(index="label", values="n", columns="site_id").reset_index()
    row_df.columns.name = ""
    all_sites = [10, 20, 30, 40, 60]
    for site in all_sites:
        if site not in row_df.columns:
            row_df[site] = None
    row_df = row_df.reset_index(drop=True)
    return row_df


def get_table_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:
    df_month = df_source[df_source.visit_code==visit_code].copy()
    
    row_df = df_month.copy()
    table_df = get_row_df(row_df, "Total (n)")
    
    row_df = df_month.query("ogtt_value<7.8 and fbg_value<6.1").copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT <7.8; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT <7.8; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT <7.8; FBG >=7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥7.8 to <11.1; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥7.8 to <11.1; FBG >=7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥11.1; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥11.1; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "OGTT ≥11.1; FBG >=7.0")])

    row_df = df_month[(df_month.ogtt_value.isna())].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "Missing OGTT")])
    return table_df


def format_table_df(tbl_df, add_totals:bool|None=None):
    add_totals = True if add_totals is None else add_totals
    tbl_df = tbl_df.fillna(0.0)
    tbl_df["total"] = tbl_df.iloc[:,1:].sum(axis=1)
    tbl_df = tbl_df.reset_index(drop=True)

    if add_totals:
        df_last = tbl_df[1:].sum().to_frame()
        df_last.loc["label"] = np.nan
        df_last = df_last.reset_index()
        df_last.columns = ["label", "value"]
        df_last = df_last.pivot_table(columns="label",  values="value").reset_index(drop=True)
        df_last.columns.name = ""
        df_last["label"] = "Totals"

        tbl_df = pd.concat([tbl_df, df_last])
        tbl_df = tbl_df.reset_index(drop=True)

    tbl_df.columns = ["label", "10", "20", "30", "40", "60", "total"]

    for site in ["10", "20", "30", "40", "60", "total"]:
        tbl_df[f"{site}_perc"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0
        tbl_df[f"{site}_perc_str"] = tbl_df[f"{site}_perc"].map('{:.1f}'.format)


    for site in ["10", "20", "30", "40", "60", "total"]:
        tbl_df[f"{site}_str"] = tbl_df[[f"{site}", f"{site}_perc_str"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)
        tbl_df[f"{site}_str"] = tbl_df[f"{site}_str"] + ")"

    cols = ["label", *[f"{site}_str" for site in ["10", "20", "30", "40", "60", "total"]]]
    tbl_df1 = tbl_df[cols]
    tbl_df1.loc[tbl_df.label=="Total (n)"] = tbl_df.iloc[0][["label", "10", "20", "30", "40", "60", "total"]].to_list()
    return tbl_df1

In [102]:
# Table 3: OGTT and FBG at 12-month visit
df_table3 = get_table_df(df_glucose, 1120.0)
df_table3 = format_table_df(df_table3)
df_table3 = df_table3.fillna(0.0)
gt = df_as_great_table(df_table3, title="Table 3: OGTT and FBG at 12-month visit")
gt = (
    gt
    .cols_label(column_headers_with_str)
    .cols_align(align="center", columns=["10_str", "20_str", "30_str", "40_str", "60_str", "total_str"])
    .cols_align(align="left", columns=["label"])
    .cols_width(cases={"label": "35%"})
)
html_data.append(gt.as_raw_html())
gt.show()


Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit,Table 3: OGTT and FBG at 12-month visit
Label,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Total (n),157,340,259,433,198,1387
OGTT <7.8; FBG <6.1,41 (26.1),155 (45.6),126 (48.6),119 (27.5),112 (56.6),553 (39.9)
OGTT <7.8; FBG >=6.1 <7.0,42 (26.8),85 (25.0),56 (21.6),125 (28.9),12 (6.1),320 (23.1)
OGTT <7.8; FBG >=7.0,4 (2.5),8 (2.4),3 (1.2),27 (6.2),1 (0.5),43 (3.1)
OGTT ≥7.8 to <11.1; FBG <6.1,15 (9.6),31 (9.1),41 (15.8),39 (9.0),39 (19.7),165 (11.9)
OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0,36 (22.9),45 (13.2),26 (10.0),82 (18.9),22 (11.1),211 (15.2)
OGTT ≥7.8 to <11.1; FBG >=7.0,9 (5.7),7 (2.1),4 (1.5),36 (8.3),1 (0.5),57 (4.1)
OGTT ≥11.1; FBG <6.1,0.0 (0.0),1 (0.3),1 (0.4),0.0 (0.0),1 (0.5),3.0 (0.2)
OGTT ≥11.1; FBG >=6.1 <7.0,2 (1.3),1 (0.3),0.0 (0.0),0.0 (0.0),7 (3.5),10.0 (0.7)
OGTT ≥11.1; FBG >=7.0,3 (1.9),3 (0.9),1 (0.4),3 (0.7),3 (1.5),13 (0.9)


In [103]:
# Table 4: OGTT and FBG at 24-month visit
df_table4 = get_table_df(df_glucose, 1240.0)
df_table4 = format_table_df(df_table4)
df_table4 = df_table4.fillna(0.0)
gt = df_as_great_table(df_table4, title="Table 4: OGTT and FBG at 24-month visit")
gt = (
    gt
    .cols_label(column_headers_with_str)
    .cols_align(align="center", columns=["10_str", "20_str", "30_str", "40_str", "60_str", "total_str"])
    .cols_align(align="left", columns=["label"])
    .cols_width(cases={"label": "35%"})
)
html_data.append(gt.as_raw_html())
gt.show()

Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit,Table 4: OGTT and FBG at 24-month visit
Label,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Total (n),107,257,173,316,113,966
OGTT <7.8; FBG <6.1,37 (34.6),120 (46.7),63 (36.4),104 (32.9),54 (47.8),378 (39.1)
OGTT <7.8; FBG >=6.1 <7.0,23 (21.5),60 (23.3),46 (26.6),90 (28.5),18 (15.9),237 (24.5)
OGTT <7.8; FBG >=7.0,2 (1.9),0.0 (0.0),4 (2.3),9 (2.8),0.0 (0.0),15.0 (1.6)
OGTT ≥7.8 to <11.1; FBG <6.1,14 (13.1),34 (13.2),23 (13.3),28 (8.9),24 (21.2),123 (12.7)
OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0,15 (14.0),37 (14.4),34 (19.7),65 (20.6),11 (9.7),162 (16.8)
OGTT ≥7.8 to <11.1; FBG >=7.0,1 (0.9),3 (1.2),3 (1.7),16 (5.1),2 (1.8),25 (2.6)
OGTT ≥11.1; FBG <6.1,0.0 (0.0),1 (0.4),0.0 (0.0),0.0 (0.0),1 (0.9),2.0 (0.2)
OGTT ≥11.1; FBG >=6.1 <7.0,0.0 (0.0),0.0 (0.0),0.0 (0.0),0.0 (0.0),1 (0.9),1.0 (0.1)
OGTT ≥11.1; FBG >=7.0,1 (0.9),1 (0.4),0.0 (0.0),2 (0.6),0.0 (0.0),4.0 (0.4)


In [104]:
# Table 5: OGTT and FBG at 36-month visit
df_table5 = get_table_df(df_glucose, 1360.0)
df_table5 = format_table_df(df_table5)
df_table5 = df_table5.fillna(0.0)
gt = df_as_great_table(df_table5, title="Table 5: OGTT and FBG at 36-month visit")
gt = (
    gt
    .cols_label(column_headers_with_str)
    .cols_align(align="center", columns=["10_str", "20_str", "30_str", "40_str", "60_str", "total_str"])
    .cols_align(align="left", columns=["label"])
    .cols_width(cases={"label": "35%"})
)
html_data.append(gt.as_raw_html())
gt.show()

Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit,Table 5: OGTT and FBG at 36-month visit
Label,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Total (n),19,44,27,34,0.0,124.0
OGTT <7.8; FBG <6.1,9 (47.4),15 (34.1),3 (11.1),5 (14.7),0.0 (0.0),32.0 (25.8)
OGTT <7.8; FBG >=6.1 <7.0,2 (10.5),10 (22.7),13 (48.1),12 (35.3),0.0 (0.0),37.0 (29.8)
OGTT <7.8; FBG >=7.0,1 (5.3),1 (2.3),1 (3.7),3 (8.8),0.0 (0.0),6.0 (4.8)
OGTT ≥7.8 to <11.1; FBG <6.1,2 (10.5),8 (18.2),3 (11.1),2 (5.9),0.0 (0.0),15.0 (12.1)
OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0,2 (10.5),9 (20.5),7 (25.9),12 (35.3),0.0 (0.0),30.0 (24.2)
OGTT ≥11.1; FBG >=6.1 <7.0,0.0 (0.0),1 (2.3),0.0 (0.0),0.0 (0.0),0.0 (0.0),1.0 (0.8)
Missing OGTT,3 (15.8),0.0 (0.0),0.0 (0.0),0.0 (0.0),0.0 (0.0),3.0 (2.4)
Totals,19.0 (100.0),44.0 (100.0),27.0 (100.0),34.0 (100.0),0.0 (0.0),124.0 (100.0)


In [105]:
# Table 6: Any OGTT>11.1 ever
row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()
table_df = get_row_df(row_df, "Total (n)")
df_table6 = format_table_df(table_df)
df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)
gt = df_as_great_table(df_table, title="Table 6: Any OGTT>11.1 ever")
gt = (
    gt
    .cols_label(column_headers_with_str)
    .cols_align(align="center", columns=["10_str", "20_str", "30_str", "40_str", "60_str", "total_str"])
    .cols_align(align="left", columns=["label"])
    .cols_width(cases={"label": "35%"})

)
html_data.append(gt.as_raw_html())
gt.show()

Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever,Table 6: Any OGTT>11.1 ever
Label,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Total (n),6,8,3,11,19,47


In [106]:
# func for table 7
def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:
    df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()

    row_df = df_month.copy()
    table_df = get_row_df(row_df, "Total (n)")

    row_df = df_month[(df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG <6.1")])

    row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG >=6.1 <7.0")])

    row_df = df_month[(df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG >=7.0")])
    return table_df

In [107]:
# Table 7: Interim FBG results
df_table7 = get_table7_df(df_glucose, 1150.0)
df_table7 = format_table_df(df_table7, add_totals=False)
df_table7["visit_code"] = MONTH15

df_table71 = get_table7_df(df_glucose, 1180.0)
df_table71 = format_table_df(df_table71, add_totals=False)
df_table71["visit_code"] = MONTH18

df_table72 = get_table7_df(df_glucose, 1210.0)
df_table72 = format_table_df(df_table72, add_totals=False)
df_table72["visit_code"] = MONTH21

df_table73 = get_table7_df(df_glucose, 1270.0)
df_table73 = format_table_df(df_table73, add_totals=False)
df_table73["visit_code"] = MONTH27

df_table74 = get_table7_df(df_glucose, 1300.0)
df_table74 = format_table_df(df_table74, add_totals=False)
df_table74["visit_code"] = MONTH30

df_table75 = get_table7_df(df_glucose, 1330.0)
df_table75 = format_table_df(df_table75, add_totals=False)
df_table75["visit_code"] = MONTH33

df_table76 = get_table7_df(df_glucose, 1390.0)
df_table76 = format_table_df(df_table76, add_totals=False)
df_table76["visit_code"] = MONTH39

df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])
df_table = df_table.reset_index(drop=True)
df_table = df_table.fillna(0.0)

In [108]:
column_headers_with_str = {"visit_code": "Visit Code", **column_headers_with_str}
gt = df_as_great_table2(df_table, title="Table 7: Interim FBG results")
gt = (
    gt
    .cols_label(column_headers_with_str)
    .cols_move_to_start(columns="visit_code")
    .cols_align(align="center", columns=["10_str", "20_str", "30_str", "40_str", "60_str", "total_str"])
    .cols_align(align="left", columns=["visit_code", "label"])
    .cols_width(cases={"label": "15%"})
    .tab_style(
        style=[
            style.text(color="black", weight="bold"),
            style.fill(color="lightgray")
        ],
        locations=loc.row_groups()
    )
)
html_data.append(gt.as_raw_html())
gt.show()

Table 7: Interim FBG results,Table 7: Interim FBG results,Table 7: Interim FBG results,Table 7: Interim FBG results,Table 7: Interim FBG results,Table 7: Interim FBG results,Table 7: Interim FBG results
Unnamed: 0_level_1,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
1150,1150,1150,1150,1150,1150,1150
Total (n),114,269,193,319,100,995
FBG <6.1,48 (42.1),204 (75.8),127 (65.8),168 (52.7),64 (64.0),611 (61.4)
FBG >=6.1 <7.0,49 (43.0),53 (19.7),57 (29.5),105 (32.9),26 (26.0),290 (29.1)
FBG >=7.0,17 (14.9),9 (3.3),8 (4.1),46 (14.4),10 (10.0),90 (9.0)
1180,1180,1180,1180,1180,1180,1180
Total (n),148,348,223,408,169,1296
FBG <6.1,79 (53.4),237 (68.1),131 (58.7),177 (43.4),106 (62.7),730 (56.3)
FBG >=6.1 <7.0,61 (41.2),103 (29.6),86 (38.6),157 (38.5),55 (32.5),462 (35.6)
FBG >=7.0,6 (4.1),6 (1.7),3 (1.3),69 (16.9),8 (4.7),92 (7.1)


In [109]:
# Table 8: Primary Endpoint met
df_endpoint_grp = df_endpoint.groupby(by=["site_id", "endpoint_label"]).size().to_frame().reset_index()
df_endpoint_grp.columns = ["site_id", "label", "endpoints"]
df_endpoint_pivot = df_endpoint_grp.pivot_table(index="label", columns="site_id", values="endpoints").reset_index()
df_endpoint_pivot.columns.name = ""
df_endpoint_pivot.columns = ['label', "10", "20", "30", "40", "60"]
df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()
df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'
df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)
df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)

In [110]:
gt = df_as_great_table(
    df_endpoint_pivot,
    title="Table 8: Primary Endpoint met"
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k not in ["visit_code"]})
    .cols_align(align="center", columns=["10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["label"])
    .cols_width(cases={"label": "25%"})
)
html_data.append(gt.as_raw_html())
gt.show()

Table 8: Primary Endpoint met,Table 8: Primary Endpoint met,Table 8: Primary Endpoint met,Table 8: Primary Endpoint met,Table 8: Primary Endpoint met,Table 8: Primary Endpoint met,Table 8: Primary Endpoint met
Label,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
EOS - Patient developed diabetes,2.0,0.0,0.0,1.0,1.0,4.0
"FBG >= 7 x 2, first OGTT<=11.1",1.0,1.0,0.0,17.0,1.0,20.0
OGTT >= 11.1,6.0,8.0,3.0,10.0,19.0,46.0
Total,9.0,9.0,3.0,28.0,21.0,70.0


In [111]:
# Table 9: Incident Rate per 1000 person years

def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):
    if not lower_days:
        lower_days = -1
    cutoff_datetime = df_visit.query("@lower_days<followup_days<=@upper_days").visit_datetime.max()
    # exclude subjects for this reason
    offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']

    df_eos = get_eos_df()
    df_eos_excluded = (
        df_eos
        .query("followup_days>@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)")
        .copy()
        .reset_index()
    )
    df_visit_final = (
        df_visit.query("@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0")
        .merge(df_eos_excluded[["subject_identifier"]], on="subject_identifier", how="left", suffixes=("", "_y"), indicator=True)
        .query("_merge=='left_only'")
        .drop(columns=["_merge"])
    )
    df_main = (
        df_visit_final
        .groupby(by=["subject_identifier"])[["baseline_datetime", "visit_datetime", "followup_days"]]
        .max()
        .reset_index()
    )

    df_main = (
        df_main
        .merge(
            df_endpoint.query("days_to_endpoint>@lower_days")[["subject_identifier", "endpoint_label", "endpoint_type", "days_to_endpoint"]],
            how="left",
            on=["subject_identifier"])
        .reset_index(drop=True)
    )
    if lower_days>=365.25:
        df_main["followup_days"] = df_main["followup_days"] - lower_days
    df_main["followup_years"] = df_main["followup_days"]/365.25
    return df_main, len(df_main), len(df_main.query("@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()"))

def get_rate_and_ci(events, person_years_total):
    lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000
    upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000
    return events/person_years_total*1000, lower_ci, upper_ci

def get_incidence_data(term:str, lower_days:float, upper_days:float):
    data = {}
    df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)
    person_years_total = df_main.followup_years.sum()
    data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})
    return data

In [112]:
incidence_data = {}
incidence_data.update(get_incidence_data("total", lower_days=-1, upper_days=10000))
incidence_data.update(get_incidence_data("0-1 years", lower_days=-1, upper_days=365.25))
incidence_data.update(get_incidence_data("1-2 years", lower_days=365.25, upper_days=2 * 365.25))
incidence_data.update(get_incidence_data("2-3 years", lower_days=2 * 365.25, upper_days=3 * 365.25))
incidence_data.update(get_incidence_data("3+ years", lower_days=3 * 365.25, upper_days=10 * 365.25))
data = dict(label=[], person_years=[], subjects=[], failures=[], rate=[], lower_ci=[], upper_ci=[])
for k in incidence_data:
    data["label"].append(k)

for v in incidence_data.values():
    data["person_years"].append(v[0])
    data["subjects"].append(v[1])
    data["failures"].append(v[2])
    data["rate"].append(v[3])
    data["lower_ci"].append(v[4])
    data["upper_ci"].append(v[5])

df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!="subjects"})

In [113]:
gt = df_as_great_table(
    df_table9,
    title="Table 9: Incident Rate per 1000 person years",
    subtitle=md("using randomisation to diabetes/last seen"),
)
gt = gt.fmt_number(columns=["person_years", "failures", "rate", "lower_ci", "upper_ci"], decimals=2)
gt = (gt
    .cols_label({"label": "Label", "person_years": "Person years", "failures": "Failures", "rate": "Rate", "lower_ci": "Lower", "upper_ci": "Upper"})
    .cols_align(align="left", columns=["label"])
    .cols_align(align="center", columns=["person_years", "failures", "rate", "lower_ci", "upper_ci"])
    .tab_spanner(
        label="95%CI",
        columns=["lower_ci", "upper_ci"],
    )
    .tab_source_note(source_note="Excluding patients withdrawn for `late exclusion` criteria")
)
gt.show()
html_data.append(gt.as_raw_html())

Table 9: Incident Rate per 1000 person years,Table 9: Incident Rate per 1000 person years,Table 9: Incident Rate per 1000 person years,Table 9: Incident Rate per 1000 person years,Table 9: Incident Rate per 1000 person years,Table 9: Incident Rate per 1000 person years
using randomisation to diabetes/last seen,using randomisation to diabetes/last seen,using randomisation to diabetes/last seen,using randomisation to diabetes/last seen,using randomisation to diabetes/last seen,using randomisation to diabetes/last seen
Label,Person years,Failures,Rate,95%CI,95%CI
Label,Person years,Failures,Rate,Lower,Upper
total,3193.27,70.00,21.92,17.09,27.70
0-1 years,1318.36,29.00,22.00,14.73,31.59
1-2 years,1025.03,24.00,23.41,15.00,34.84
2-3 years,420.38,16.00,38.06,21.75,61.81
3+ years,9.29,1.00,107.61,2.72,599.56
Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria


In [114]:
# Table 10: Proportion meeting primary endpoint
df_table10 = pd.DataFrame(data=data)
df_table10["proportion"] = df_table10["failures"]/df_table10["subjects"]*100
gt = df_as_great_table(
    df_table10[["label", "subjects", 'failures', "proportion"]],
    title="Table 10: Proportion meeting primary endpoint",
)
gt = (
    gt
    .fmt_number(columns=["failures", "proportion"], decimals=2)
    .cols_label({"label": "Label", "subjects": "Participants", "failures": "Failures", "proportion": "%"})
    .cols_align(align="left", columns=["label"])
    .cols_align(align="center", columns=["subjects", "failures", "proportion"])
    .tab_source_note(source_note="Excluding patients withdrawn for `late exclusion` criteria")
)
html_data.append(gt.as_raw_html())
gt.show()


Table 10: Proportion meeting primary endpoint,Table 10: Proportion meeting primary endpoint,Table 10: Proportion meeting primary endpoint,Table 10: Proportion meeting primary endpoint
Label,Participants,Failures,%
total,1631,70.00,4.29
0-1 years,1631,29.00,1.78
1-2 years,1414,24.00,1.70
2-3 years,965,16.00,1.66
3+ years,101,1.00,0.99
Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria,Excluding patients withdrawn for `late exclusion` criteria


In [115]:
# Table 11a: End of Study Table (for those who have completed an end of study form)
df_eos = get_eos_df()
offstudy_reasons = {
    "Delivered / Completed followup from pregnancy": "Pregnancy",
    "Patient completed 36 months of follow-up": "Completed 36m",
    "Patient developed diabetes": "Developed diabetes",
    "Other reason (specify below)": "Other",
    "Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment": "Late exclusion",
    "Patient has been transferred to another health centre": "Transferred out",
    "Patient is withdrawn on CLINICAL grounds ...": "Withdrawal: Clinical grounds",
    "Patient lost to follow-up": "LTFU",
    "Patient reported/known to have died": "Died",
    "Patient withdrew consent to participate further": "Withdrawal: Consent",
}
df_eos["offstudy_reason"] = df_eos["offstudy_reason"].map(offstudy_reasons)
df_eos["offstudy_reason"] = pd.Categorical(df_eos["offstudy_reason"], categories=sorted(list(offstudy_reasons.values())), ordered=True)
df_eos["site_id"] = df_eos["site_id"].astype(str)
df_eos_pivot = (
    df_eos
    .groupby(by=["offstudy_reason", "site_id"],observed=True)
    .size()
    .reset_index()
    .pivot_table(index="offstudy_reason", columns="site_id", values=0, observed=True)
    .fillna(0)
    .astype(int)
    .reset_index()
)
df_eos_pivot["total"] = df_eos_pivot[["10", "20","30","40","60"]].sum(axis=1)
df_eos_pivot.columns.name=""
sum_row = df_eos_pivot.select_dtypes(include='int64').sum()
sum_row['offstudy_reason'] = 'Total'
sum_row_df = pd.DataFrame(sum_row).T
enrolled_pivot["offstudy_reason"] = "Enrolled"
enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]
df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)

gt = df_as_great_table(
    df_eos_pivot,
    title="Table 11a: End of study report",
    subtitle=md("for those who have completed an End of study report"),
)
gt = (
    gt
    .cols_label({"offstudy_reason": "Reason", **{k:v for k,v in column_headers.items() if k not in ["visit_code", "label"]}})
    .cols_align(align="left", columns=["offstudy_reason"])
    .cols_align(align="center", columns=["10", "20","30","40","60", "total"])
    .tab_style(
        style=[style.fill(color="snow"), style.text(color="black")],
        locations=loc.body(
            columns=[0],
            rows=[len(df_eos_pivot)-1]),
        )
    .tab_style(
        style=[style.fill(color="lightblue"), style.text(color="black")],
        locations=loc.body(
            columns=["10", "20", "30", "40", "60"],
            rows=[len(df_eos_pivot)-1],
        ),
    )
    .tab_style(
        style=[style.fill(color="lightgreen"), style.text(color="black")],
        locations=loc.body(
            columns=["total"],
            rows=[len(df_eos_pivot)-1],
        ),
    )
    .tab_style(
        style=[style.fill(color="snow"), style.text(color="black")],
        locations=loc.body(
            columns=["offstudy_reason"],
            rows=[0],
        ),
    )
)
html_data.append(gt.as_raw_html())
gt.show()


Table 11a: End of study report,Table 11a: End of study report,Table 11a: End of study report,Table 11a: End of study report,Table 11a: End of study report,Table 11a: End of study report,Table 11a: End of study report
for those who have completed an End of study report,for those who have completed an End of study report,for those who have completed an End of study report,for those who have completed an End of study report,for those who have completed an End of study report,for those who have completed an End of study report,for those who have completed an End of study report
Reason,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
Enrolled,185.0,391.0,340.0,545.0,230.0,1691.0
Completed 36m,0.0,1.0,0.0,0.0,0.0,1.0
Developed diabetes,4.0,7.0,2.0,9.0,16.0,38.0
Died,0.0,2.0,0.0,2.0,2.0,6.0
LTFU,0.0,0.0,0.0,11.0,0.0,11.0
Late exclusion,8.0,17.0,11.0,18.0,6.0,60.0
Other,1.0,0.0,0.0,2.0,0.0,3.0
Pregnancy,0.0,2.0,0.0,0.0,2.0,4.0
Transferred out,0.0,0.0,1.0,0.0,0.0,1.0
Withdrawal: Clinical grounds,0.0,2.0,0.0,2.0,0.0,4.0


In [116]:
# Table 11b: Study status
def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:
    columns = {k:f"{k}_{mode}" for k in ["10", "20", "30", "40", "60"]}
    df_schedule = (
        df_subjecthistory
        .query(f"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()")
        .groupby(by=["onschedule_model", "site_id"])
        .size()
        .reset_index()
        .pivot_table(index="onschedule_model", columns="site_id", values=0, observed=True)
        .reset_index()
        .rename(columns={"onschedule_model":"schedule", **columns})
        .fillna(0)
        .copy()
    )
    df_schedule.columns.name = ""
    return df_schedule

df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={"site": "site_id"})
df_subjecthistory["site_id"] = df_subjecthistory["site_id"].astype(str)

df_on = pd.concat([
    get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', "on"),
    get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', "on"),
    get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', "on"),
])

df_on = (
    df_on
    .fillna(0)
    .reset_index(drop=True)
)

df_off = pd.concat([
    get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', "off"),
    get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', "off"),
    get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', "off"),
])
df_off = (
    df_off
    .fillna(0)
    .reset_index(drop=True)
)

df_status = pd.merge(df_on, df_off, on=["schedule"], how="outer")
columns = []
for ele in [[f"{x}_on", f"{x}_off"] for x in ["10", "20", "30", "40", "60"]]:
    columns.extend(ele)
df_status = df_status[["schedule", *columns]]
df_status["total_on"] = df_status[[col for col in columns if "on" in col]].sum(axis=1)
df_status["total_off"] = df_status[[col for col in columns if "off" in col]].sum(axis=1)
df_status["total"] = df_status[columns].sum(axis=1)
df_status["schedule"] = df_status.schedule.map({"meta_prn.onschedule": "Main trial", "meta_prn.onscheduledmreferral": "Diabetes", "meta_prn.onschedulepregnancy": "Pregnancy"})

gt = df_as_great_table(
    df_status,
    title="Table 11b: Study status",
    subtitle=md("Calculated from Offschedule form; not End of study report"),
)
# gt = gt.fmt_number(columns=["person_years", "failures", "rate", "lower_ci", "upper_ci"], decimals=0)
gt = (gt
    .tab_source_note(
        source_note=(
            "Note: Offschedule form is always submitted before the End of study report. "
            "When the Offschedule form is submitted, future appointments for the schedule are removed and "
            "the site staff are actioned to submit the End of study report."
        )
    )
    .cols_label({
        "10_on": "On", "10_off": "Off",
        "20_on": "On", "20_off": "Off",
        "30_on": "On", "30_off": "Off",
        "40_on": "On", "40_off": "Off",
        "60_on": "On", "60_off": "Off",
        "total_on": "On", "total_off": "Off",
        "schedule": "Schedule", "total": "Total"})
    .cols_align(align="center")
    .cols_align(align="left", columns=["label"])
    .tab_spanner(
        label="Hindu mandal",
        columns=["10_on", "10_off"],
    )
    .tab_spanner(
        label="Amana",
        columns=["20_on", "20_off"],
    )
    .tab_spanner(
        label="Temeke",
        columns=["30_on", "30_off"],
    )
    .tab_spanner(
        label="Mwananyamala",
        columns=["40_on", "40_off"],
    )
    .tab_spanner(
        label="Mnazi Moja",
        columns=["60_on", "60_off"],
    )
    .tab_spanner(
        label="Total",
        columns=["total_on", "total_off"],
    )
    .tab_style(
        style=[style.fill(color="lightblue"), style.text(color="black")],
        locations=loc.body(
            columns=["10_off", "20_off", "30_off", "40_off", "60_off"],
            rows=list(range(0, 1)),
        ),
    )
    .tab_style(
        style=[style.fill(color="lightgreen"), style.text(color="black")],
        locations=loc.body(
            columns=["total_off"],
            rows=list(range(0, 1)),
        ),
    )
    .fmt_number(columns=[*[c for c in df_status.columns if c not in ["schedule"]]], decimals=0)
)
html_data.append(gt.as_raw_html())
gt.show()

Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status,Table 11b: Study status
Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report,Calculated from Offschedule form; not End of study report
Schedule,Hindu mandal,Hindu mandal,Amana,Amana,Temeke,Temeke,Mwananyamala,Mwananyamala,Mnazi Moja,Mnazi Moja,Total,Total,Total
Schedule,On,Off,On,Off,On,Off,On,Off,On,Off,On,Off,Total
Main trial,158,27,343,48,316,24,459,86,184,46,1460,231,1691
Diabetes,6,0,1,1,1,0,27,1,5,11,40,13,53
Pregnancy,1,1,2,3,5,0,5,3,2,6,15,13,28
"Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report.","Note: Offschedule form is always submitted before the End of study report. When the Offschedule form is submitted, future appointments for the schedule are removed and the site staff are actioned to submit the End of study report."


In [117]:
# Table 12: Loss to Follow Up
df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={"site": "site_id"})
df_ltfu_pivot = (
    df_ltfu
    .groupby(by=["loss_category", "site_id"],observed=True,dropna=False)
    .size()
    .reset_index()
    .pivot_table(index="loss_category", columns="site_id", values=0, observed=True,dropna=False)
    .fillna(0)
    .astype(int)
    .reset_index()
)
df_ltfu_pivot["total"] = df_eos_pivot[["10", "20","30","40","60"]].sum(axis=1)
df_ltfu_pivot.columns.name=""
sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()
sum_row['loss_category'] = 'Total'
sum_row_df = pd.DataFrame(sum_row).T
df_ltfu_pivot = pd.concat([df_ltfu_pivot, sum_row_df], ignore_index=True)
df_ltfu_pivot


Unnamed: 0,loss_category,60,total
0,OTHER,1,1691.0
1,Total,1,


In [None]:
# Table 11c: End of study report not submitted

df1 = (
    df_status
    .query("schedule=='Main trial'")[[col for col in columns if "off" in col]]
    .rename(columns=dict(zip([col for col in columns if "off" in col], ["10", "20","30","40","60"])))
    .reset_index(drop=True)
)
df2 = (
    df_eos_pivot
    .query("offstudy_reason=='Total'")[["10", "20","30","40","60"]]
    .reset_index(drop=True)
)

df_eos_not_reported = df1-df2
df_eos_not_reported["schedule"] = 'Main trial'
df_eos_not_reported["total"] = df_eos_not_reported[["10", "20","30","40","60"]].sum(axis=1)
df_eos_not_reported = df_eos_not_reported[["schedule", "10", "20","30","40","60", "total"]]

gt = df_as_great_table(
    df_eos_not_reported,
    title="Table 11c: End of study report not submitted",
    subtitle=md("End of study report expected based on Offschedule form"),
)
gt = (
    gt
    .cols_label({"schedule": "Schedule", **{k:v for k,v in column_headers.items() if k not in ["visit_code", "label"]}})
    .cols_align(align="left", columns=["schedule"])
    .cols_align(align="center", columns=["10", "20","30","40","60", "total"])
    .tab_style(
        style=[style.fill(color="snow"), style.text(color="black")],
        locations=loc.body(
            columns=[0],
            rows=[len(df_eos_pivot)-1]),
        )
    .tab_style(
        style=[style.fill(color="lightblue"), style.text(color="black")],
        locations=loc.body(
            columns=["10", "20", "30", "40", "60"],
            rows=[len(df_eos_pivot)-1],
        ),
    )
    .tab_style(
        style=[style.fill(color="lightgreen"), style.text(color="black")],
        locations=loc.body(
            columns=["total"],
            rows=[len(df_eos_pivot)-1],
        ),
    )
)
html_data.append(gt.as_raw_html())
gt.show()


In [83]:
# Table 13: Baseline Sample

In [84]:
# Table 15: Consented to extended followup
df_consented = (
    read_frame(SubjectConsentV1Ext.objects.all(), verbose=False)
    .query("agrees_to_extension==@YES")
    .rename(columns={"site": "site_id"})
)
df_consented["site_id"] = df_consented.site_id.astype(str)
df_consented["month"] = df_consented.report_datetime.dt.strftime("%m")
df_consented["year"] = df_consented.report_datetime.dt.strftime("%Y")
df_consented_grp = (
    df_consented.groupby(by=["site_id", "year", "month"]).
    size()
    .reset_index()
    .sort_values(by=["site_id", "year", "month"], ascending=True)
    .reset_index(drop=True)
)
df_consented_pivot = (
    df_consented_grp
    .pivot_table(index=["year", "month"], columns="site_id", values=0, aggfunc="sum")
    .reset_index()
    .fillna(0)
)
if "60" not in df_consented_pivot.columns:
    df_consented_pivot["60"] = 0.0 * len(df_consented_pivot)
df_consented_pivot.columns.name=""
df_consented_pivot["year"] = df_consented_pivot["year"].astype(str)
df_consented_pivot["month"] = df_consented_pivot["month"].astype(str)

sum_row = df_consented_pivot[["10", "20","30","40","60"]].sum()
sum_row['year'] = "Total"
sum_row['month'] = ""
df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)
df_consented_pivot["total"] = df_consented_pivot[["10", "20","30","40","60"]].sum(axis=1).astype(int)
df_consented_pivot[["10", "20","30","40","60"]] = df_consented_pivot[["10", "20","30","40","60"]].astype(int)
gt = df_as_great_table2(
    df_consented_pivot,
    title="Table 15: Consented to extended followup",
    rowname_col="month",
    groupname_col="year",
)
gt = (
    gt
    .cols_label({"year": "Year", "month": "Month", **{k:v for k, v in column_headers.items() if k not in ["visit_code", "label"]}})
    .cols_align(align="center")
    .fmt_number(columns=["10", "20", "30", "40", "60", "total"], decimals=0)
    .tab_stubhead(label="Consented")
    .tab_style(
        style=[
            style.text(color="black", weight="bold"),
            style.fill(color="lightgray")
        ],
        locations=loc.row_groups()
    )
)
html_data.append(gt.as_raw_html())
gt.show()

Table 15: Consented to extended followup,Table 15: Consented to extended followup,Table 15: Consented to extended followup,Table 15: Consented to extended followup,Table 15: Consented to extended followup,Table 15: Consented to extended followup,Table 15: Consented to extended followup
Consented,Hindu Mandal,Amana,Temeke,Mwananyamala,Mnazi Moja,Total
2024,2024,2024,2024,2024,2024,2024
12,1,9,4,7,0,21
2025,2025,2025,2025,2025,2025,2025
01,4,11,7,8,0,30
02,4,6,5,9,0,24
03,12,12,7,2,0,33
04,0,5,3,3,0,11
12,0,0,1,0,0,1
Total,Total,Total,Total,Total,Total,Total
,21,43,27,29,0,120


In [85]:
# gather raw html
raw_html = [f'<div class="page-break">{s}</div>' for s in html_data]
style_css = """
<style>
  .page-break {
    page-break-inside: avoid; /* Always add page break before this element */
  }
  .table-header {
    font-weight: bold;
    font-size: 18px;
    text-align: center;
    border-bottom: None;
  }
</style>
"""
raw_html = ''.join(raw_html)
raw_html = f'<!DOCTYPE html>\n<html lang="en">\n{style_css}\n<head>\n<meta charset="utf-8"/>\n</head>\n<body>\n' + document_title + raw_html + '\n</body>\n</html>\n'

In [86]:
# render html to PDF
pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),
options={
    'footer-center': 'Page [page] of [topage]',
    'footer-font-size': '8',
    'footer-spacing': '5',
    'encoding': "UTF-8",
    'margin-top':'10mm',
    'margin-right':'15mm',
    'margin-bottom':'15mm',
    'margin-left':'15mm',
    'header-center': study_title,
    'header-font-size': '6',
    'header-spacing': '0',
    'disable-javascript': None,
    'no-outline': None,
},
verbose=True)

Loading pages (1/6)
Counting pages (2/6)                                               
Resolving links (4/6)                                                       
Loading headers and footers (5/6)                                           
Printing pages (6/6)
Done                                                                        


True