In [None]:
%%capture
import os
from pathlib import Path

import pandas as pd
from dj_notebook import activate

env_file = os.environ["META_ENV"]
reports_folder = Path(os.environ["META_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["META_ANALYSIS_FOLDER"])
pharmacy_folder = Path(os.environ["META_PHARMACY_FOLDER"])
plus = activate(dotenv_file=env_file)
pd.set_option('future.no_silent_downcasting', True)

In [None]:
import numpy as np
import io
# import msoffcrypto
import mempass

from datetime import datetime
from edc_appointment.constants import ONTIME_APPT, NEW_APPT, CANCELLED_APPT, MISSED_APPT
from edc_pdutils.dataframes import get_crf, get_subject_visit
# from tabulate import tabulate
from meta_analytics.dataframes import get_glucose_fbg_ogtt_df, get_glucose_fbg_df
from meta_analytics.dataframes import GlucoseEndpointsByDate
from meta_analytics.dataframes import get_glucose_df
from meta_analytics.dataframes import EndpointByDate

In [None]:
cls = GlucoseEndpointsByDate()

In [None]:
cls.run()

In [None]:
cls.endpoint_only_df.endpoint_label.value_counts()

In [None]:
df_glu = get_glucose_df()
df_glu.query("subject_identifier=='105-40-0379-1'")

ep = EndpointByDate(subject_df=df_glu.query("subject_identifier=='105-40-0379-1'").copy().sort_values(by=["visit_code"]).reset_index(drop=True), fbg_threshhold=7.0, ogtt_threshhold=11.1)
ep.evaluate()
ep.subject_df

In [None]:
df_katie = pd.read_csv(analysis_folder / "katie_endpoint_subjects.csv")

In [None]:
cls.endpoint_only_df[~cls.endpoint_only_df.subject_identifier.isin(df_katie.subject_identifier)]

In [None]:
df_katie[~df_katie.subject_identifier.isin(cls.endpoint_only_df.subject_identifier)]


In [None]:
df_katie

In [None]:
df_glu = get_glucose_df()
df_glu.query("subject_identifier=='105-40-0370-0'")


In [None]:
df_visit = get_subject_visit(model="meta_subject.subjectvisit")

In [None]:
# Table 1 Visits completed to date
df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=["visit_code", "site_id"]).size().to_frame().reset_index()
df_tbl1.columns = ["visit_code", "site_id", "visits"]
df1 = df_tbl1.pivot(index="visit_code", columns="site_id", values="visits").reset_index()
df1.columns.name = None
df1.columns = ['visit_code', "10", "20", "30", "40", "60"]
df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)
df1.fillna(0, inplace=True)
df_attended = df1.copy()
df_attended

In [None]:
# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed
df_tbl12 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=["visit_code", "site_id"]).size().to_frame().reset_index()
df_tbl12.columns = ["visit_code", "site_id", "visits"]
df1 = df_tbl12.pivot(index="visit_code", columns="site_id", values="visits").reset_index()
df1.columns.name = None
df1.columns = ['visit_code', "10", "20", "30", "40", "60"]
df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)
df1.fillna(0, inplace=True)
df_missed = df1.copy()

df_attended.set_index(["visit_code"], inplace=True)
df_missed.set_index(["visit_code"], inplace=True)

attended_and_missed = df_attended + df_missed
attended_and_missed.fillna(0, inplace=True)
attended_and_missed.reset_index(inplace=True)
attended_and_missed.set_index(["visit_code"], inplace=True)
attended_and_missed_perc = df_missed/attended_and_missed
attended_and_missed_perc.fillna(0, inplace=True)
attended_and_missed_perc.reset_index(inplace=True)
attended_and_missed_perc.set_index(["visit_code"], inplace=True)

df_result = df_missed.merge(attended_and_missed_perc, on=["visit_code"], suffixes=("", "_perc"))
for col in  ["10", "20", "30", "40", "60", "total"]:
    col_perc = f"{col}_perc"
    df_result[col] = df_result.apply(lambda x: f"{x[col]} ({x[col_perc]*100:.2f})", axis=1)
df_result.reset_index(inplace=True)
df_result.sort_values(by=["visit_code"], ascending=True, inplace=True)
df_result[["visit_code", "10", "20", "30", "40", "60", "total"]]

In [None]:

# Table 3: OGTT and FBG at 12-month visit

In [None]:
def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:
    row_df = row_df.groupby(by=["site_id"]).site_id.count().to_frame(name="n")
    row_df["label"] = label
    row_df = row_df.reset_index()
    row_df = row_df.pivot(index="label", values="n", columns="site_id").reset_index()
    row_df.columns.name = ""
    all_sites = [10, 20, 30, 40, 60]
    for site in all_sites:
        if site not in row_df.columns:
            row_df[site] = None
    row_df = row_df.reset_index(drop=True)
    return row_df


def get_table_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:
    df_month = df_source[df_source.visit_code==visit_code].copy()
    
    row_df = df_month.copy()
    table_df = get_row_df(row_df, "Total (n)")
    
    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT <7.8; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT <7.8; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT <7.8; FBG >=7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥7.8 to <11.1; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥7.8 to <11.1; FBG >=7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥11.1; FBG <6.1")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥11.1; FBG >=6.1 <7.0")])
    
    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "2-hour OGTT ≥11.1; FBG >=7.0")])

    row_df = df_month[(df_month.ogtt_value.isna())].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "Missing OGTT")])
    return table_df



In [None]:
def format_table_df(tbl_df, rename_columns:bool|None=None, add_totals:bool|None=None):
    add_totals = True if add_totals is None else add_totals
    tbl_df = tbl_df.fillna(0.0)
    tbl_df["total"] = tbl_df.iloc[:,1:].sum(axis=1)
    tbl_df = tbl_df.reset_index(drop=True)

    if add_totals:
        df_last = tbl_df[1:].sum().to_frame()
        df_last.loc["label"] = np.nan
        df_last = df_last.reset_index()
        df_last.columns = ["label", "value"]
        df_last = df_last.pivot_table(columns="label",  values="value").reset_index(drop=True)
        df_last.columns.name = ""
        df_last["label"] = "totals"

        tbl_df = pd.concat([tbl_df, df_last])
        tbl_df = tbl_df.reset_index(drop=True)

    tbl_df.columns = ["label", "10", "20", "30", "40", "60", "Total"]

    for site in ["10", "20", "30", "40", "60", "Total"]:
        tbl_df[f"{site}_perc"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0
        tbl_df[f"{site}_perc_str"] = tbl_df[f"{site}_perc"].map('{:.1f}'.format)


    for site in ["10", "20", "30", "40", "60", "Total"]:
        tbl_df[f"{site}_str"] = tbl_df[[f"{site}", f"{site}_perc_str"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)
        tbl_df[f"{site}_str"] = tbl_df[f"{site}_str"] + ")"

    cols = ["label", *[f"{site}_str" for site in ["10", "20", "30", "40", "60", "Total"]]]
    tbl_df1 = tbl_df[cols]
    tbl_df1.loc[tbl_df.label=="Total (n)"] = tbl_df.iloc[0][["label", "10", "20", "30", "40", "60", "Total"]].to_list()
    if rename_columns:
        tbl_df1 = tbl_df1.rename(columns={"10_str": "Hindu Mandal", "20_str": "Amana", "30_str": "Temeke", "40_str": "Mwananyamala", "60_str": "Mnazi Moja", "Total_str": "Total"})
    return tbl_df1


In [None]:
df_glucose = get_glucose_fbg_ogtt_df()
df_glucose_fbg = get_glucose_fbg_df()
df_glucose = pd.concat([df_glucose, df_glucose_fbg])

In [None]:
df_table3 = get_table_df(df_glucose, 1120.0)
df_table3 = format_table_df(df_table3, rename_columns=True)
df_table3

In [None]:
df_table4 = get_table_df(df_glucose, 1240.0)
df_table4 = format_table_df(df_table4, rename_columns=True)
df_table4

In [None]:
df_table5 = get_table_df(df_glucose, 1360.0)
df_table5 = format_table_df(df_table5, rename_columns=True)
df_table5


In [None]:
# df_table6 = get_table_df(df_glucose, 1480.0, rename_columns=True)
# df_table6

In [None]:
row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()
table_df = get_row_df(row_df, "Total (n)")
df_table6 = format_table_df(table_df, rename_columns=True)
df_table6[:1]

In [None]:
def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:
    df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()

    row_df = df_month.copy()
    table_df = get_row_df(row_df, "Total (n)")

    row_df = df_month[(df_month.fbg_value<6.1)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG <6.1")])

    row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG >=6.1 <7.0")])

    row_df = df_month[(df_month.fbg_value>=7.0)].copy()
    table_df = pd.concat([table_df, get_row_df(row_df, "FBG >=7.0")])
    return table_df


In [None]:
from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39

df_table7 = get_table7_df(df_glucose, 1150.0)
df_table7 = format_table_df(df_table7, rename_columns=True, add_totals=False)
df_table7["visit_code"] = MONTH15

df_table71 = get_table7_df(df_glucose, 1180.0)
df_table71 = format_table_df(df_table71, rename_columns=True, add_totals=False)
df_table71["visit_code"] = MONTH18

df_table72 = get_table7_df(df_glucose, 1210.0)
df_table72 = format_table_df(df_table72, rename_columns=True, add_totals=False)
df_table72["visit_code"] = MONTH21

df_table73 = get_table7_df(df_glucose, 1270.0)
df_table73 = format_table_df(df_table73, rename_columns=True, add_totals=False)
df_table73["visit_code"] = MONTH27

df_table74 = get_table7_df(df_glucose, 1300.0)
df_table74 = format_table_df(df_table74, rename_columns=True, add_totals=False)
df_table74["visit_code"] = MONTH30

df_table75 = get_table7_df(df_glucose, 1330.0)
df_table75 = format_table_df(df_table75, rename_columns=True, add_totals=False)
df_table75["visit_code"] = MONTH33

df_table76 = get_table7_df(df_glucose, 1390.0)
df_table76 = format_table_df(df_table76, rename_columns=True, add_totals=False)
df_table76["visit_code"] = MONTH39

df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])
df_table[["visit_code", "label", "Hindu Mandal", "Amana", "Temeke", "Mwananyamala", "Mnazi Moja", "Total"]]


In [None]:
cls = GlucoseEndpointsByDate()
cls.run()
# cls.endpoint_only_df.endpoint_type.value_counts()
# cls.endpoint_only_df.endpoint_label.value_counts(dropna=False)

In [None]:
df = cls.endpoint_only_df.groupby(by=["site_id", "endpoint_label"]).size().to_frame().reset_index()
df.columns = ["site_id", "label", "endpoints"]
df = df.pivot_table(index="label", columns="site_id", values="endpoints").reset_index()
df.columns.name = ""
df.columns = ['label', "10", "20", "30", "40", "60"]
df.loc[len(df)] = df[['10', '20', '30', '40', '60']].sum().to_dict()
df.at[len(df)-1, 'label'] = 'Total'
df['Total'] = df[['10', '20', '30', '40', '60']].sum(axis=1)
df.fillna(0, inplace=True)
df
# print(tabulate(df[['label', '10', '20', '30', '40', '60', 'Total']], showindex=False, headers="keys", tablefmt="simple_grid"))


In [None]:
df

In [None]:
len(cls.endpoint_df["subject_identifier"].unique())

In [None]:
cls.endpoint_df[(cls.endpoint_df["endpoint"]==1)]["interval_in_days"].describe()

In [None]:
cls.endpoint_only_df[(cls.endpoint_only_df["endpoint"]==1)]["days_to_endpoint"].describe()

In [None]:
len(cls.endpoint_df)

In [None]:
len(cls.endpoint_only_df)

In [None]:
cls.endpoint_only_df["subject_identifier"].nunique()

In [None]:

fname = "cross_check_end_fbgdate_pivot.csv"
df_pivot = cls.endpoint_df.sort_values(by=["subject_identifier"]).set_index("subject_identifier").pivot_table(columns=["visit_code"], values=["fbg_value","ogtt_value"], index=["subject_identifier"])
df_pivot.sort_values(('visit_code'), axis=1).sort_values("subject_identifier").to_csv(analysis_folder / fname, sep="|", encoding="utf8", index=True)

In [None]:

fname = f"glucose-{datetime.now().strftime("%Y-%m-%d-%H%M")}.csv"
get_crf("meta_subject.glucose", subject_visit_model="meta_subject.subjectvisit", drop_columns=["consent_model"]).to_csv(analysis_folder / fname, sep="|", encoding="utf8", index=False)


In [None]:
fname = f"glucosefbg-{datetime.now().strftime("%Y-%m-%d-%H%M")}.csv"
get_crf("meta_subject.glucosefbg", subject_visit_model="meta_subject.subjectvisit", drop_columns=["consent_model"]).to_csv(analysis_folder / fname, sep="|", encoding="utf8", index=False)


In [None]:

fname = f"glucose-merged-{datetime.now().strftime("%Y-%m-%d-%H%M")}.csv"
cls.df.to_csv(analysis_folder / fname, sep="|", encoding="utf8", index=False)


In [None]:
passwd = mempass.mkpassword(2)
fname = "KBs_latest_enders_26072024.xlsx"
decrypted_workbook = io.BytesIO()
with open(analysis_folder / fname, 'rb') as file:
    office_file = msoffcrypto.OfficeFile(file)
    office_file.load_key(password=passwd)
    office_file.decrypt(decrypted_workbook)
    
df_katie2 = pd.read_excel(decrypted_workbook, index_col=0)
df_katie2 = df_katie2.copy()
df_katie2 = df_katie2.reset_index()
print(passwd)