In [None]:
%%capture
# uv sync --group develop --group lint --group test --group notebook
import os
from pathlib import Path

import numpy as np
import pandas as pd
from dj_notebook import activate
from django_pandas.io import read_frame

env_file = os.environ["META_ENV"]
reports_folder = Path(os.environ["META_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["META_ANALYSIS_FOLDER"])
pharmacy_folder = Path(os.environ["META_PHARMACY_FOLDER"])
plus = activate(dotenv_file=env_file)
pd.set_option("future.no_silent_downcasting", True)

In [None]:
from meta_analytics.dataframes.screening import get_screening_df

In [None]:
exclude = "SR9E8B4D"
df = get_screening_df()
df = df[df["screening_identifier"]!='SR9E8B4D'].copy().reset_index(drop=True)
df = df.rename(columns={'site':'site_id', 'hba1c_value':'hba1c'})
df["source"] = "meta3"
df["weight"] = df["weight"].astype("Float64")
df["height"] = df["height"].astype("Float64")
df["eligible"] = np.where(df["eligible"]==True, 1, 0)
df["eligible"] = df["eligible"].astype("int64")
df["consented"] = np.where(df["consented"]==True, 1, 0)
df["consented"] = df["consented"].astype("int64")
df["refused"] = np.where(df["refused"]==True, 1, 0)
df["refused"] = df["refused"].astype("int64")
df["converted_creatinine_value"] = np.where(pd.isna(df["converted_creatinine_value"]), np.nan, df["converted_creatinine_value"])
df["sys_blood_pressure"] = np.where(pd.isna(df["sys_blood_pressure"]), np.nan, df["sys_blood_pressure"])
df["dia_blood_pressure"] = np.where(pd.isna(df["dia_blood_pressure"]), np.nan, df["dia_blood_pressure"])
df["creatinine_value"] = np.where(pd.isna(df["creatinine_value"]), np.nan, df["creatinine_value"])
df["fbg"] = df['converted_fbg_value'].fillna(df['converted_fbg2_value'])
df["ogtt"] = df['converted_ogtt_value'].fillna(df['converted_ogtt2_value'])
df["subject_identifier"] = np.where(df["subject_identifier"].str.startswith("105-"), df["subject_identifier"], pd.NA)
df = df.rename(columns={'fasting':'fasted', "calculated_bmi_value": "bmi"})


In [None]:
df_meta3 = df[~(df["fbg"].isna()) & ~(df["ogtt"].isna()) & ~(df["hba1c"].isna())][["screening_identifier",
     "subject_identifier",
     "report_datetime",
     "fasted",
     "fbg",
     "ogtt",
     "hba1c",
     "age_in_years",
     "gender",
     "waist_circumference",
     "bmi",
     "weight",
     "height",
     "eligible",
     "consented",
     "reasons_ineligible", "source"]].copy().reset_index(drop=True)

In [None]:
meta2_folder = Path("/Users/erikvw/Documents/ucl/protocols/meta2/analysis")
df_screening_meta2 = pd.read_csv(meta2_folder  / "meta2_screening_hba1c_paper_anu.csv", sep="|")
df_screening_meta2["source"] = "meta2"

In [None]:
df_meta2 = df_screening_meta2[~(df_screening_meta2["fbg"].isna()) & ~(df_screening_meta2["ogtt"].isna()) & ~(df_screening_meta2["hba1c"].isna())][
    ["screening_identifier",
     "subject_identifier",
     "source",
     "report_datetime",
     "fasted",
     "fbg",
     "ogtt",
     "hba1c",
     "age_in_years",
     "gender",
     "waist_circumference",
     "bmi",
     "weight",
     "height",
     "eligible",
     "consented",
     "reasons_ineligible",
    ]
].copy().reset_index(drop=True)

In [None]:
df = pd.concat([df_meta2, df_meta3], ignore_index=True).reset_index(drop=True)
df["report_datetime"] = pd.to_datetime(df["report_datetime"], utc=True).dt.tz_localize(None).astype("datetime64[ns]")


In [None]:
folder = Path('/Users/erikvw/Library/CloudStorage/OneDrive-UniversityCollegeLondon/Documents - igh.respond-africa/RESPOND-AFRICA-writing-workshop/anu')
df.to_csv(folder / "meta_2_3_screening.csv", sep="|", index=True)

df.to_stata(folder / "meta_2_3_screening.dta", version=118, write_index=False)


In [None]:
# df_ids = pd.read_stata(folder /"diagnostic analysis_short_for merge.dta")

In [None]:
# df_ids[(~df_ids["screening_identifier"].isin(df["screening_identifier"]))]