In [None]:
%%capture
# output is suppressed but normally would spew out all the edc loading messages

import os
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
import math
# import matplotlxib.pyplot as plt
# import seaborn as sns
import scipy.stats as stats

from dj_notebook import activate

env_file = os.environ["META_ENV"]
documents_folder = os.environ["META_DOCUMENTS_FOLDER"]
report_folder = Path(documents_folder)

plus = activate(dotenv_file=env_file)


In [None]:
from meta_screening.models import SubjectScreening
from meta_consent.models import SubjectConsent
from meta_subject.models import PhysicalExam, SubjectVisit
from django_pandas.io import read_frame
from django.contrib.sites.models import Site
from edc_analytics.custom_tables import (
    BmiTable, BpTable, AgeTable, ArtTable, GenderTable, FbgTable, OgttTable, 
    FbgOgttTable, WaistCircumferenceTable, HbA1cTable
)
from meta_analytics.dataframes.screening import get_glucose_tested_only_df


In [None]:
df_all = get_glucose_tested_only_df() # slow

In [None]:
sites = {obj.domain: obj.id for obj in Site.objects.all()}
df_all["site"] = df_all["site"].map(sites)

In [None]:
df_all

In [None]:
df = df_all.copy()
df = df[df["has_dm"]=="No"]
df = df.reset_index(drop=True)

In [None]:
tbl = GenderTable(df)
gender_df = tbl.table_df

In [None]:
tbl = WaistCircumferenceTable(df)
waist_df = tbl.table_df

In [None]:
tbl = ArtTable(df)
art_df = tbl.table_df

In [None]:
tbl = AgeTable(df)
age_df = tbl.table_df

In [None]:
tbl = BpTable(df)
bp_df = tbl.table_df

In [None]:
tbl = FbgTable(df)
fbg_df = tbl.table_df

In [None]:
tbl = OgttTable(df)
ogtt_df = tbl.table_df

In [None]:
tbl = FbgOgttTable(df)
fbg_ogtt_df = tbl.table_df

In [None]:
tbl = BmiTable(df)
bmi_df = tbl.table_df

In [None]:
tbl = HbA1cTable(df)
hba1c_df = tbl.table_df

In [None]:
tbl_df = pd.concat([gender_df, age_df, bmi_df, waist_df, bp_df, art_df, fbg_df, ogtt_df, fbg_ogtt_df, hba1c_df], ignore_index=True)
tbl_df.iloc[:, :5]

In [None]:

fname = f"meta3_table2-{datetime.now().strftime("%Y-%m-%d-%H%M")}.csv"
tbl_df.iloc[:, :5].to_csv(path_or_buf=report_folder / fname, encoding="utf-8", index=0, sep="|")


In [None]:
df_all[df_all["waist_circumference"].notna()].count()

In [None]:
subject_identifiers = list(df["subject_identifier"])
qs_subject_visit = SubjectVisit.objects.filter(subject_identifier__in=subject_identifiers)
df_subject_visit = read_frame(qs_subject_visit)
df_subject_visit.rename(columns={"id": "subject_visit"}, inplace=True)
qs_physical_exam = PhysicalExam.objects.filter(
    subject_visit__subject_identifier__in=subject_identifiers
)
df_physical_exam = read_frame(qs_physical_exam)
# merge w/ subject visit to get subject_identifier
# df_physical_exam = pd.merge(
#     df_physical_exam,
#     df_subject_visit[
#         ["subject_visit", "subject_identifier", "visit_code", "visit_code_sequence"]
#     ],
#     on="subject_visit",
#     how="left",
# )
# df_physical_exam = df_physical_exam[
#     ["subject_identifier", "visit_code", "visit_code_sequence", "waist_circumference"]
# ]
# df_physical_exam[["waist_circumference"]] = df[["waist_circumference"]].apply(
#     pd.to_numeric
# )
# # rename column to waist_circumference_baseline
# df_physical_exam["waist_circumference_baseline"] = df_physical_exam["waist_circumference"]
# df_physical_exam.drop(columns=["waist_circumference"])
# df_physical_exam[["waist_circumference_baseline"]] = df_physical_exam[
#     ["waist_circumference_baseline"]
# ].apply(pd.to_numeric)


In [None]:
df_physical_exam.count()

In [None]:
df_physical_exam = pd.merge(
    df_physical_exam,
    df_subject_visit[
        ["subject_visit", "subject_identifier", "visit_code", "visit_code_sequence"]
    ],
    on="subject_visit",
    how="left",
)


In [None]:
len(df_physical_exam)

In [None]:
df_physical_exam = df_physical_exam[
    ["subject_identifier", "visit_code", "visit_code_sequence", "waist_circumference"]
]
df_physical_exam[["waist_circumference"]] = df_physical_exam[["waist_circumference"]].apply(
    pd.to_numeric
)


In [None]:
len(df_physical_exam)

In [None]:
# rename column to waist_circumference_baseline
df_physical_exam["waist_circumference_baseline"] = df_physical_exam["waist_circumference"]
df_physical_exam.drop(columns=["waist_circumference"])
df_physical_exam[["waist_circumference_baseline"]] = df_physical_exam[
    ["waist_circumference_baseline"]
].apply(pd.to_numeric)


In [None]:
len(df_physical_exam)

In [None]:
df1 = df_all.copy()

In [None]:
df1 = pd.merge(
    df1,
    df_physical_exam[["subject_identifier", "waist_circumference_baseline"]],
    on="subject_identifier",
    how="left",
    indicator=True
)


In [None]:
df1.count()

In [None]:

cond = (df1["waist_circumference"].isna()) & (df1["waist_circumference_baseline"].notna())

In [None]:
df1.loc[cond, "waist_circumference"] = df1["waist_circumference_baseline"]

In [None]:
df1.count()

In [None]:
df1[["waist_circumference", "waist_circumference_baseline"]].count()

In [None]:
df_physical_exam["waist_circumference"].describe()

In [None]:
df_physical_exam[["waist_circumference"]] = df_physical_exam[
    ["waist_circumference"]
].apply(pd.to_numeric)


In [None]:
df1["waist_circumference"].describe()

In [None]:
df1[~df1["screening_identifier"].isin(df1['screening_identifier'].unique())]