In [None]:
%%capture
import os
from pathlib import Path
import pandas as pd
from dj_notebook import activate
import numpy as np
from django_pandas.io import read_frame

env_file = os.environ["META_ENV"]
reports_folder = Path(os.environ["META_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["META_ANALYSIS_FOLDER"])
pharmacy_folder = Path(os.environ["META_PHARMACY_FOLDER"])
plus = activate(dotenv_file=env_file)
pd.set_option('future.no_silent_downcasting', True)

In [None]:
import pdfkit
from datetime import date
from edc_pdutils.dataframes import get_subject_visit
from meta_analytics.dataframes import get_glucose_fbg_ogtt_df, get_glucose_fbg_df
from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39
from meta_analytics.dataframes import GlucoseEndpointsByDate
from scipy.stats import chi2
from great_tables import loc, style, md
from meta_analytics.dataframes import get_eos_df
from meta_analytics.utils import df_as_great_table, df_as_great_table2
from meta_prn.models import LossToFollowup
from edc_visit_schedule.models import SubjectScheduleHistory
from edc_appointment.analytics import get_appointment_df
from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT, SCHEDULED_APPT, COMPLETE_APPT, INCOMPLETE_APPT, IN_PROGRESS_APPT, UNSCHEDULED_APPT
from edc_constants.constants import YES
from meta_consent.models import SubjectConsentV1Ext

In [None]:
html_data = []
cutoff_date = date(2025,3, 31)
end_of_trial_date= date(2026,3, 1)
document_title = f"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {cutoff_date.strftime('%d %B %Y')}</h5>"
study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'
pdf_filename = f"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf"

column_headers = {"appt_datetime": "Appointment", "year": "Year", "month": "Month", "10": "Hindu Mandal", "20": "Amana", "30": "Temeke", "40": "Mwananyamala", "60": "Mnazi Moja", "total": "Total"}



In [None]:
df_visit = get_subject_visit("meta_subject.subjectvisit")
df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]
df_appointments = get_appointment_df()
df_appointments["site_id"] = df_appointments.site_id.astype(str)
cls = GlucoseEndpointsByDate()
cls.run()
df_endpoint = cls.endpoint_only_df.copy()
df_glucose = get_glucose_fbg_ogtt_df()
df_glucose_fbg = get_glucose_fbg_df()
df_glucose = pd.concat([df_glucose, df_glucose_fbg])

enrolled = df_visit.copy()
enrolled["site_id"] = enrolled["site_id"].astype(str)
enrolled_pivot = (
    enrolled
    .query("visit_code==1000.0").groupby(["site_id"])
    .size()
    .reset_index()
    .pivot_table(columns="site_id", values=0, observed=True)
)
enrolled_pivot.columns.name=""
enrolled_pivot["total"] = enrolled_pivot[["10", "20","30","40","60"]].sum(axis=1)



In [None]:
# Table 1f Future scheduled appointments per month
df_appt_pivot = (
    # df_appointments.query("appt_datetime<=@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and ~appt_status.isin([@NEW_APPT])")
    df_appointments.query("@cutoff_date<=appt_datetime<=@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])")
    .set_index("appt_datetime")
    .groupby(by=["site_id", pd.Grouper(freq="ME")])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0:"patients"})
    .pivot(index="appt_datetime", columns="site_id", values="patients")
    .reset_index()
    .fillna(0)
)

df_appt_pivot.columns.name = None
df_appt_pivot["total"] = df_appt_pivot.iloc[:,1:].sum(axis=1)
df_appt_pivot["appt_datetime"] = df_appt_pivot.appt_datetime.dt.strftime("%Y-%m")
sum_row = df_appt_pivot.select_dtypes(include='float64').sum()
sum_row['appt_datetime'] = 'Total-'
sum_row_df = pd.DataFrame(sum_row).T
df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0)
df_appt_pivot[["year", "month"]] = df_appt_pivot["appt_datetime"].str.split("-", expand=True)

df_appt_pivot2 = (
    # df_appointments.query("appt_datetime<=@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and ~appt_status.isin([@NEW_APPT])")
    df_appointments.query("@cutoff_date<=appt_datetime<=@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])")
    .set_index("visit_code")
    .groupby(by=["site_id", "visit_code"])
    .agg(["last"])
    .size()
    .to_frame()
    .reset_index()
    .rename(columns={0:"patients"})
    .pivot(index="visit_code", columns="site_id", values="patients")
    .reset_index()
    .fillna(0)
)

df_appt_pivot2.columns.name = None
df_appt_pivot2["total"] = df_appt_pivot2.iloc[:,1:].sum(axis=1)
df_appt_pivot2["visit_code"] = df_appt_pivot2.visit_code.astype(str)
sum_row = df_appt_pivot2.select_dtypes(include='float64').sum()
sum_row['visit_code'] = 'Total-'
sum_row_df = pd.DataFrame(sum_row).T
df_appt_pivot2 = pd.concat([df_appt_pivot2, sum_row_df], axis=0)

# df_appt_pivot2[["year", "month"]] = df_appt_pivot2["appt_datetime"].str.split("-", expand=True)


df_appt_pivot2

In [None]:
def get_df_appt(criteria:str):
    df_appt = (
        df_appointments.query("@cutoff_date<=appt_datetime<=@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT]) and visit_code<2000.0")
        .groupby(["site_id",  "appt_datetime"])
        .agg("last")
        .reset_index()
        .query(criteria)
        .set_index("appt_datetime")
        .groupby(by=["site_id", pd.Grouper(freq="ME")])
        .size()
        .to_frame()
        .reset_index()
        .rename(columns={0:"patients"})
        .pivot(index="appt_datetime", columns="site_id", values="patients")
        .reset_index()
        .fillna(0)
    )
    df_appt.columns.name = None
    df_appt["total"] = df_appt.iloc[:,1:].sum(axis=1)
    sum_row = df_appt.select_dtypes(include='float64').sum()
    sum_row_df = pd.DataFrame(sum_row).T
    df_appt = pd.concat([df_appt, sum_row_df], axis=0)
    df_appt["appt_datetime"] = df_appt.appt_datetime.dt.strftime("%Y-%m")
    df_appt[["year", "month"]] = df_appt["appt_datetime"].str.split("-", expand=True)
    df_appt["year"] = df_appt["year"].fillna("Total")
    return df_appt


gt = df_as_great_table2(
    get_df_appt(criteria="visit_code.isin([1360.0, 1480.0])"),
    title="Table 1f: Participants who will complete followup on 1360 or 1480 before 2026-03-01",
    # subtitle="Visit codes 1360 or 1480 only",
    rowname_col="month",
    groupname_col="year",
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["appt_datetime", "10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["month", "year"])
    .fmt_number(columns=["10", "20", "30", "40", "60", "total"], decimals=0)
    .tab_source_note(source_note=f"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.")
    .tab_style(
        style=[
            style.text(color="black", weight="bold"),
            style.fill(color="lightgray")
        ],
        locations=loc.row_groups()
    )
)
html_data.append(gt.as_raw_html())
gt.show()

In [None]:

gt = df_as_great_table2(
    get_df_appt(criteria="~visit_code.isin([1360.0, 1480.0])"),
    title="Table 1f: Participants who will NOT complete followup on 1360 or 1480 before 2026-03-01",
    rowname_col="month",
    groupname_col="year",
)
gt = (
    gt
    .cols_label({k:v for k, v in column_headers.items() if k!="label"})
    .cols_align(align="center", columns=["appt_datetime", "10", "20", "30", "40", "60", "total"])
    .cols_align(align="left", columns=["month", "year"])
    .fmt_number(columns=["10", "20", "30", "40", "60", "total"], decimals=0)
    .tab_source_note(source_note=f"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.")
    .tab_style(
        style=[
            style.text(color="black", weight="bold"),
            style.fill(color="lightgray")
        ],
        locations=loc.row_groups()
    )
)
html_data.append(gt.as_raw_html())
gt.show()

In [None]:
# gather raw html
raw_html = [f'<div class="page-break">{s}</div>' for s in html_data]
style_css = """
<style>
  .page-break {
    page-break-inside: avoid; /* Always add page break before this element */
  }
  .table-header {
    font-weight: bold;
    font-size: 18px;
    text-align: center;
    border-bottom: None;
  }
</style>
"""
raw_html = ''.join(raw_html)
raw_html = f'<!DOCTYPE html>\n<html lang="en">\n{style_css}\n<head>\n<meta charset="utf-8"/>\n</head>\n<body>\n' + document_title + raw_html + '\n</body>\n</html>\n'

In [None]:
pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),
options={
    'footer-center': 'Page [page] of [topage]',
    'footer-font-size': '8',
    'footer-spacing': '5',
    'encoding': "UTF-8",
    'margin-top':'10mm',
    'margin-right':'15mm',
    'margin-bottom':'15mm',
    'margin-left':'15mm',
    'header-center': study_title,
    'header-font-size': '6',
    'header-spacing': '0',
    'disable-javascript': None,
    'no-outline': None,
},
verbose=True)