In [None]:
%%capture
# output is suppressed but normally would spew out all the edc loading messages

import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
import math
# import matplotlxib.pyplot as plt
# import seaborn as sns
import scipy.stats as stats

from dj_notebook import activate

env_file = os.environ["META_ENV"]
documents_folder = os.environ["META_DOCUMENTS_FOLDER"]
report_folder = Path(documents_folder)

plus = activate(dotenv_file=env_file)


In [None]:
from meta_screening.models import SubjectScreening
from django_pandas.io import read_frame


In [None]:
cols = [
    f.name
    for f in SubjectScreening._meta.get_fields()
    if f.name
    not in [
        "contact_number",
        "initials",
        "hospital_identifier",
        "modified",
        "user_created",
        "user_modified",
        "hostname_created",
        "hostname_modified",
        "device_created",
        "device_modified",
        "locale_created",
        "locale_modified",
        "slug",
    ]
]
qs_screening = SubjectScreening.objects.values(*cols).all()
df = read_frame(qs_screening)

In [None]:
df.count()


In [None]:
from edc_constants.constants import NO, YES
df.count()
df.hiv_pos.value_counts()

In [None]:
df = df.drop(df[df["hiv_pos"] == "No"].index)
# df = df.drop(df[df.art_six_months==NO].index)
# df = df.drop(df[df.on_rx_stable==NO].index)
df.count()

In [None]:
# check for duplicate subjects / there are none
# df[df.duplicated(["hospital_identifier"], keep=False)]
# len(df)

In [None]:
len(df)

In [None]:
df_tmp = df.gender.value_counts().to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df_tmp = df.has_dm.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df_tmp = df[df.has_dm.isna()].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond1 = (df.has_dm==NO) & (df.on_dm_medication==NO)
df_tmp = df[cond1].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond2 = (cond1 & (df.on_rx_stable==YES) & (df.art_six_months==YES) & (df.vl_undetectable==YES))
df_tmp = df[cond2].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond3 = (cond2 & (df.staying_nearby_12==YES) & (df.lives_nearby==YES))
df_tmp = df[cond3].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond4 = (cond3 & ~(df.pregnant==YES))
df_tmp = df[cond4].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond5 = (cond4 & (df.congestive_heart_failure==NO) & (df.liver_disease==NO) & (df.alcoholism==NO) & (df.acute_metabolic_acidosis==NO) & (df.renal_function_condition==NO) & (df.tissue_hypoxia_condition==NO) & (df.acute_condition==NO) & (df.metformin_sensitivity==NO))

df_tmp = df[cond5].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df[cond5].eligible_part_one.value_counts()

In [None]:
df[cond5].eligible_part_two.value_counts()


In [None]:
cond6 = (cond5 & (df.meta_phase_two==NO))
df_tmp = df[cond6].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond7 = (cond6 & (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes") & (df.agree_to_p3==YES))
df_tmp = df[cond7].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond8 = (cond7 & (df.already_fasted==YES))
df_tmp = df[cond8].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond9 = (cond7 & (df.already_fasted==NO))
df_tmp = df[cond9].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
from edc_model.utils import duration_hm_to_timedelta

# gen fasted variable
def get_duration_dh_to_timedelta(s):
    if not pd.isna(s["fasting_duration_str"]):
        return duration_hm_to_timedelta(s["fasting_duration_str"])
    return s["fasting_duration_str"]

def get_fasted(s):
    if pd.isna(s["fasted_duration_delta"]) and not has_glucose_value(s):
        return None
    elif pd.isna(s["fasted_duration_delta"]) and has_glucose_value(s):
        return has_glucose_value(s)
    if s["fasted_duration_delta"] <= pd.Timedelta(hours=8):
        return NO
    return YES

def has_glucose_value(s):
    if not pd.isna(s["fbg_value"]):
        return "FBG only"
    if not pd.isna(s["ogtt_value"]) and not pd.isna(s["fbg_value"]):
        return "FBG-OGTT"
    elif pd.isna(s["ogtt_value"]) and pd.isna(s["ogtt2_value"]) and pd.isna(s["fbg_value"]) and pd.isna(s["fbg2_value"]):
        return False
    return True

df["fasted_duration_delta"] = df.apply(get_duration_dh_to_timedelta, axis=1)
df["fasted"] = df.apply(get_fasted, axis=1)


In [None]:
df[(df.subject_identifier.notna()) & (df.subject_identifier.str.len() < 20)].eligible.value_counts()

In [None]:
df_tmp = df[cond9 & (df.fasted==YES)].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df_tmp = df[cond9 & ~(df.fasted==YES)].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df[cond9 & ~(df.fasted==YES) & (df.subject_identifier.str.len() < 20)][["screening_identifier", "subject_identifier"]]

In [None]:
# never returned or not evaluated
# note some have part three started and 1 even has a fasting duration
cond10 = (cond9 & (df.eligible_part_three=="To be determined"))
# df[(df.fasted==YES) & cond_eligible].eligible_part_one.value_counts(dropna=False)
df_tmp = df[cond10].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond11 = (cond7 & (df.fasted==YES))
df_tmp = df[cond11].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond11 = (cond9 & ~(df.eligible_part_three=="To be determined") & (df.fasted==NO))
df_tmp = df[cond11].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
cond11 = (cond9 & ~(df.eligible_part_three=="To be determined") & ~(df.fbg_value.isna()))
df_tmp = df[cond11].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df_tmp = df.eligible_part_three.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp

In [None]:
df_tmp = df[all_conds & (df.agree_to_p3==YES) & (df.already_fasted==NO) & ~(df.fasted==YES) & cond_eligible].gender.value_counts(dropna=False).to_frame().reset_index()
df_tmp["total"] = df_tmp["count"].sum()
df_tmp["prop"] = df_tmp["count"] / df_tmp["total"]
df_tmp


In [None]:
all_conds = (all_conds & (df.agree_to_p3==YES) & (df.already_fasted==NO))
cond_eligible = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes")
df[all_conds & cond_eligible & (df.eligible_part_three.isin([YES, NO]))].gender.value_counts(dropna=False)


In [None]:
print(len(df[(df["ogtt_base_datetime"].notna()) | (df["ogtt2_base_datetime"].notna())]))
print(len(df[(df["ogtt_datetime"].notna()) | (df["ogtt2_datetime"].notna())]))
print(len(df[(df["ogtt_value"].notna()) | (df["ogtt2_value"].notna())]))
# len(df[(df["fbg_value"].notna()) | (df["fbg2_value"].notna())])
df[((df["ogtt_value"].notna()) | (df["ogtt2_value"].notna()) | (df["fbg_value"].notna()) | (df["fbg2_value"].notna())) & (df["has_dm"]=="No")]
                                                                                                                        

In [None]:
# counts by site - row, column
gender_by_site = pd.crosstab(df['site'], df['gender'], margins=True)
gender_by_site.columns = ["F (%)", "M (%)", "(%)"]
gender_by_site.index = ["amana", "hindu-mandal", "mnazi-moja", "mwananyamala", "temeke", "total (%)"]
gender_by_site

In [None]:
round(gender_by_site/len(df) , 3) * 100


In [None]:

round(gender_by_site.div(gender_by_site["(%)"], axis=0) , 3) * 100


In [None]:
# has_dm fillna with unk
df["has_dm"] = df["has_dm"].apply(lambda x: "unk" if not x else x)

# in_catchment =
df["in_catchment"] = (df["lives_nearby"] == "Yes") & (df["staying_nearby_12"] == "Yes")



In [None]:
# run crosstabs

In [None]:
# crosstab by has_dm, gender
df_crosstab = pd.crosstab(df['has_dm'], df['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab

In [None]:
# crosstab by has_dm == Yes by on_dm_medication, gender
cond = (df["has_dm"]=="Yes")
df_crosstab = pd.crosstab(df[cond]['on_dm_medication'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab

In [None]:
# crosstab by has_dm == No by on_dm_medication, gender
cond = (df["has_dm"]=="No")
df_crosstab = pd.crosstab(df[cond]['on_dm_medication'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab

In [None]:
# crosstab by has_dm == No & on_dm_medication==No by stable on ART for 6m, gender
cond = (df["has_dm"]=="No") & (df['on_dm_medication']=="No") & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes")
neg_cond = (df["has_dm"]=="No") & (df['on_dm_medication']=="No") & ((df['on_rx_stable']!="Yes") | (df['vl_undetectable']!="Yes") | (df['art_six_months']!="Yes"))
df_crosstab = pd.crosstab(df[neg_cond]['art_six_months'], df[neg_cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab

In [None]:
# "lives_nearby",
# "staying_nearby_12",
# crosstab by has_dm == No & on_dm_medication==No by stable on ART for 6m, gender
cond = (df["has_dm"]=="No") & (df['on_dm_medication']=="No") & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes")

df_crosstab = pd.crosstab(df[cond]['in_catchment'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab

In [None]:
# crosstab pregnant, gender
cond = (df["has_dm"]=="No") & (df['on_dm_medication']=="No") & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes") & (df['in_catchment']==True) & (df["in_catchment"]==True)
df_crosstab = pd.crosstab(df[cond]['pregnant'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab


In [None]:
# crosstab on conditions (part two)
# "congestive_heart_failure",
# "liver_disease",
# "alcoholism",
# "acute_metabolic_acidosis",
# "renal_function_condition",
# "tissue_hypoxia_condition",
# "acute_condition",
# "metformin_sensitivity",

In [None]:
# crosstab (use for any single condition)
cond = (df["has_dm"]=="No") & (df['on_dm_medication']=="No") & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes") & (df['in_catchment']==True) & (df["in_catchment"]==True) & (df["pregnant"]!="Yes")
df_crosstab = pd.crosstab(df[cond]['metformin_sensitivity'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab


In [None]:
# crosstab meta_phase_two
cond = ((df["has_dm"]=="No") & (df['on_dm_medication']=="No")
        & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes") 
        & (df['in_catchment']==True) 
        & (df["pregnant"]!="Yes")
        & (df["congestive_heart_failure"]!="Yes")
        & (df["liver_disease"]!="Yes")
        & (df["alcoholism"]!="Yes")
        & (df["acute_metabolic_acidosis"]!="Yes")
        & (df["renal_function_condition"]!="Yes")
        & (df["tissue_hypoxia_condition"]!="Yes")
        & (df["acute_condition"]!="Yes")
        & (df["metformin_sensitivity"]!="Yes")
       )
df_crosstab = pd.crosstab(df[cond]['meta_phase_two'], df[cond]['gender'], margins=True, dropna=False)
# has_dm_by_gender.columns = ["female", "male", "rowtotal"]
# has_dm_by_gender.index = ["no", "yes", "unknown", "coltotal"]
df_crosstab


In [None]:
# crosstab (use for any single condition)
cond = ((df["has_dm"]=="No") & (df['on_dm_medication']=="No")
        & (df['on_rx_stable']=="Yes") & (df['vl_undetectable']=="Yes") & (df['art_six_months']=="Yes") 
        & (df['in_catchment']==True) 
        & (df["pregnant"]!="Yes")
        & (df["congestive_heart_failure"]!="Yes")
        & (df["liver_disease"]!="Yes")
        & (df["alcoholism"]!="Yes")
        & (df["acute_metabolic_acidosis"]!="Yes")
        & (df["renal_function_condition"]!="Yes")
        & (df["tissue_hypoxia_condition"]!="Yes")
        & (df["acute_condition"]!="Yes")
        & (df["metformin_sensitivity"]!="Yes")
        & (df["meta_phase_two"]!="Yes")
       )
len(df[cond])

In [None]:
# check against eligible_part_one and two

cond_eligible = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes")
print([len(df[cond_eligible]), len(df[cond])])


In [None]:
cond = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes")
df_crosstab = pd.crosstab(df[cond]['agree_to_p3'], df[cond]['gender'], margins=True, dropna=False)
df_crosstab



In [None]:
cond = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes")
df_crosstab = pd.crosstab(df[cond]['already_fasted'], df[cond]['gender'], margins=True, dropna=False)
df_crosstab


In [None]:
# who returned and had an FBG performed
cond = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes")
df_crosstab = pd.crosstab(df[cond]['fbg_value'].notna(), df[cond]['gender'], margins=True, dropna=False)
df_crosstab


In [None]:
# df_crosstab / len(df[cond & cond2])

In [None]:
# of 5616 look at FBG and OGTT counts. Run lines for 
# glucose: fbg_value,fbg2_value,ogtt_value,ogtt2_value,
# BP: sys_blood_pressure_one, sys_blood_pressure_two,dia_blood_pressure_one, dia_blood_pressure_two  
cond = (df["eligible_part_one"]=="Yes") & (df["eligible_part_two"]=="Yes") & (df["fbg_value"].notna())

df_crosstab = pd.crosstab(df[cond]['hba1c_value'].notna(), df[cond]['gender'], margins=True, dropna=False)
df_crosstab


In [None]:
# let's look at screening glucose and BP measurements

In [None]:
cond = ((df["eligible_part_one"]=="Yes") 
        & (df["eligible_part_two"]=="Yes")
        & (df["fasted"]=="Yes")
        & ((df['fbg_value'].notna()) | (df['ogtt_value'].notna()) | (df['fbg2_value'].notna()) | (df['ogtt2_value'].notna()))
          )


In [None]:
cond = ((df["eligible_part_one"]=="Yes") 
        & (df["eligible_part_two"]=="Yes")
        & (df["fasted"]=="Yes")
        & (df['fbg_value'].notna())
          )


In [None]:
len(df[cond])

In [None]:
cond = cond & (df["ogtt_value"].notna())
df_crosstab = pd.crosstab(df[cond]['fbg_value'].notna(), df[cond]['gender'], margins=True, dropna=False)
df_crosstab


In [None]:
df2 = df[cond]
df2["fbg"] = df2["fbg_value"]
df2.loc[df["fbg_value"].notna() & df2["fbg2_value"].notna(), "fbg"] = df2["fbg2_value"]
df2["fbg"] = pd.to_numeric(df2["fbg"])

In [None]:
df2[df2["fbg2_value"].notna()][["fbg", "fbg_value","fbg2_value"]]

In [None]:
df2['fbg'].describe()

In [None]:
# PART TWO
# "congestive_heart_failure",
# "liver_disease",
# "alcoholism",
# "acute_metabolic_acidosis",
# "renal_function_condition",
# "tissue_hypoxia_condition",
# "acute_condition",
# "metformin_sensitivity",

In [None]:
# part one variables

# "meta_phase_two",
# "hiv_pos",
# "art_six_months",
# "on_rx_stable",
# "vl_undetectable",
# "lives_nearby",
# "staying_nearby_12",
# "pregnant",


In [None]:
# only fasted for 7h
df[df.subject_identifier=="105-30-0164-8"].to_dict()

In [None]:
df[~(df.subject_identifier.isna())][["fasted", "fasted_duration_delta"]]


In [None]:
from meta_prn.models import OnSchedule, OffSchedule, OnScheduleDmReferral, OffScheduleDmReferral
df_on_meta = read_frame(OnSchedule.objects.values("subject_identifier", "onschedule_datetime").all())
df_off_meta = read_frame(OffSchedule.objects.values("subject_identifier", "offschedule_datetime").all())
df_on = read_frame(OnScheduleDmReferral.objects.values("subject_identifier", "onschedule_datetime").all())
df_off = read_frame(OffScheduleDmReferral.objects.values("subject_identifier", "offschedule_datetime").all())


In [None]:
def get_meta_duration(s):
    meta_off = get_utcnow() if pd.isna(s["meta_offschedule_datetime"]) else s["meta_offschedule_datetime"]
    return meta_off - s["meta_onschedule_datetime"]  

def get_dm_duration(s):
    dm_off = get_utcnow() if pd.isna(s["dm_offschedule_datetime"]) else s["dm_offschedule_datetime"]
    return dm_off - s["dm_onschedule_datetime"]  

df_status = pd.merge(df_on_meta, df_off_meta, on="subject_identifier", how="left") 
df_status.columns = ["subject_identifier", "meta_onschedule_datetime", "meta_offschedule_datetime"]
df_status = df_status.merge(df_on, on="subject_identifier", how="left")
df_status = df_status.merge(df_off, on="subject_identifier", how="left")
df_status.columns = ["subject_identifier", "meta_onschedule_datetime", "meta_offschedule_datetime", "dm_onschedule_datetime", "dm_offschedule_datetime"]
df_status["meta_duration"] = df_status.apply(get_meta_duration, axis=1)
df_status["meta_duration_days"] = df_status["meta_duration"].dt.days
df_status["dm_duration"] = df_status.apply(get_dm_duration, axis=1)
df_status["dm_duration_days"] = df_status["dm_duration"].dt.days
df_status.to_csv(report_folder / "meta_schedule_status.csv", index=False)

In [None]:
df_on = df_on.merge(df_off, on="subject_identifier", how="left")


In [None]:
from edc_utils import get_utcnow

now = get_utcnow()
df_on["duration"] = now - df_on["onschedule_datetime"] 

In [None]:
df_on[df_on.duration >= pd.Timedelta(days=182)].to_stata
