In [None]:
%%capture
import os
from pathlib import Path
import pandas as pd
from dj_notebook import activate

env_file = os.environ["META_ENV"]
reports_folder = Path(os.environ["META_REPORTS_FOLDER"])
analysis_folder = Path(os.environ["META_ANALYSIS_FOLDER"])
pharmacy_folder = Path(os.environ["META_PHARMACY_FOLDER"])
plus = activate(dotenv_file=env_file)
pd.set_option('future.no_silent_downcasting', True)

In [None]:
from edc_pdutils.dataframes import get_crf, get_subject_visit
from edc_constants.constants import YES
from edc_appointment.analytics import get_appointment_df
from datetime import datetime

In [None]:
cutoff_datetime = datetime(2026,3,1)
df_patienthistory = get_crf("meta_subject.patienthistory", subject_visit_model="meta_subject.subjectvisit")
df_followup_examination = get_crf("meta_subject.FollowupExamination", subject_visit_model="meta_subject.subjectvisit")

In [None]:
replacements = {
    "ABC+ 3TC+ DTG": "ABC + 3TC + DTG",
    "ABC+3TC+DTG": "ABC + 3TC + DTG",
    "ABC, 3TC, DTG":"ABC + 3TC + DTG",
    "ABC, EFV, DTG": "ABC + EFV + DTG",
    "TDF+FTC+DTG": "TDF + FTC + DTG",
    "AZT+3TC+DTG": "AZT + 3TC + DTG",
    "AZT + 3 TC + DTG":"AZT + 3TC + DTG",
    "TDF+3TC+DTG": "TDF + 3TC + DTG",
}

df_patienthistory["other_current_arv_regimen"] = (
    df_patienthistory["other_current_arv_regimen"]
    .replace(replacements)
)

In [None]:
df_patienthistory['regimen'] = df_patienthistory["current_arv_regimen"]
df_patienthistory.loc[df_patienthistory["current_arv_regimen"]=="Other, specify ...", "regimen"] = df_patienthistory["other_current_arv_regimen"]

In [None]:
df_followup_examination["art_new_regimen_other"] = (
    df_followup_examination["art_new_regimen_other"]
    .replace(replacements)
)

In [None]:
df_followup_examination['regimen'] = pd.NA
df_followup_examination.loc[(df_followup_examination["art_change"]==YES) & (df_followup_examination.art_new_regimen_other.notna()), "regimen"] = df_followup_examination["art_new_regimen_other"]
df_followup_examination.loc[(df_followup_examination["art_change"]==YES) & (df_followup_examination.art_new_regimen_other.isna()), "regimen"] = "CHANGE_NOT_REPORTED"

In [None]:
df_regimen = pd.concat([df_patienthistory[["subject_identifier", "visit_datetime", "regimen"]], df_followup_examination[["subject_identifier", "visit_datetime", "regimen"]]])
df_regimen["regimen"] = df_regimen["regimen"].replace({"Other second line": "CHANGE_NOT_REPORTED"})
df_regimen["regimen"] = pd.Categorical(df_regimen["regimen"], categories=list(df_regimen.query("regimen.notna()").regimen.unique()), ordered=False)
df_regimen = df_regimen.sort_values(["subject_identifier", "visit_datetime"])
df_regimen = df_regimen.reset_index(drop=True)

In [None]:
df_pivot = df_regimen.pivot_table(values="visit_datetime", columns="regimen", index="subject_identifier", observed=True)
df_pivot = df_pivot.reset_index()

In [None]:
subject_identifier = "105-20-0050-0"
df_pivot[df_pivot.subject_identifier==subject_identifier].melt().query("value.notna() and regimen!='subject_identifier'").sort_values("value", ascending=False)

In [None]:
df_melt = df_pivot.melt(id_vars=["subject_identifier"]).query("value.notna()")

In [None]:
df_melt["max_date"] = df_melt.groupby("subject_identifier")["value"].transform("max")
df_melt["current_regimen"] = df_melt[df_melt.value==df_melt.max_date]["regimen"]

In [None]:
df_current_regimens = df_melt.query("current_regimen.notna()")[["subject_identifier", "max_date", "current_regimen"]].copy()

In [None]:
df_visit = get_subject_visit(model="meta_subject.subjectvisit")
df_visit = df_visit[df_visit.visit_code==1000.0].copy()

In [None]:
df_appointment = get_appointment_df()
df_appointment_next = (
    df_appointment
    .groupby(by=["subject_identifier", "next_appt_datetime", "next_visit_code"])
    .size()
    .copy()
    .reset_index()
)

In [None]:
df_appointment_last = (
    df_appointment[df_appointment.appt_datetime<cutoff_datetime][["subject_identifier", "appt_datetime", "visit_code"]]
    .sort_values(["subject_identifier", "appt_datetime", "visit_code"])
    .groupby(by=["subject_identifier"])
    .agg(["last"])
    .reset_index()                       )
df_appointment_last.columns = ["_".join(col).strip() for col in df_appointment_last.columns.values]
df_appointment_last = (
    df_appointment_last
        .rename(columns={
            "subject_identifier_":"subject_identifier",
            "appt_datetime_last":"last_appt_datetime",
            "visit_code_last":"last_visit_code"
        }
    )
)

In [None]:
df = df_current_regimens.merge(df_visit[[ "subject_identifier", "baseline_datetime", "endline_visit_datetime", "endline_visit_code"]], on="subject_identifier", how="left")
df = df.reset_index(drop=True)
df["changed"] = False
df.loc[df.max_date != df.baseline_datetime, "changed"] = True

In [None]:
df = df.merge(df_appointment_next[["subject_identifier", "next_appt_datetime", "next_visit_code"]], on="subject_identifier", how="left")
df = df.merge(df_appointment_last[["subject_identifier", "last_appt_datetime", "last_visit_code"]], on="subject_identifier", how="left")

In [None]:
# from last seen to final scheduled appt
df["remaining_delta_from_last_seen"] = df.last_appt_datetime - df.endline_visit_datetime
df["remaining_delta_from_last_seen"] = df["remaining_delta_from_last_seen"].apply(lambda x: 0 if x.total_seconds()<0 else x)
df["remaining_delta_from_last_seen"] = pd.to_timedelta(df["remaining_delta_from_last_seen"])
df["remaining_days_last_seen_to_final"] = df["remaining_delta_from_last_seen"].dt.days

# from now to final scheduled appt
df["remaining_delta_from_now"] = 0.0
df["remaining_delta_from_now"] = df[df.remaining_days_last_seen_to_final>0].last_appt_datetime - datetime.now()
df["remaining_delta_from_now"] = df["remaining_delta_from_now"].apply(lambda x: 0 if x.total_seconds()<0 else x)
df["remaining_delta_from_now"] = pd.to_timedelta(df["remaining_delta_from_now"])
df["remaining_days_now_to_final"] = df["remaining_delta_from_now"].dt.days

# from next appointment to final scheduled appt
df["remaining_delta_from_next"] = 0.0
df["remaining_delta_from_next"] = df[df.remaining_days_last_seen_to_final>0].last_appt_datetime - df[df.remaining_days_last_seen_to_final>0].next_appt_datetime
df["remaining_delta_from_next"] = df["remaining_delta_from_next"].apply(lambda x: 0 if x.total_seconds()<0 else x)
df["remaining_delta_from_next"] = pd.to_timedelta(df["remaining_delta_from_next"])
df["remaining_days_next_to_final"] = df["remaining_delta_from_next"].dt.days

In [None]:
df_final = (
    df
    .rename(columns={
        "max_date": "current_regimen_date",
        "endline_visit_code": "last_attended_visit_code",
        "endline_visit_datetime": "last_attended_visit_datetime",
    })
    .copy()
)
df_final = df_final[[
    "subject_identifier",
    "current_regimen",
    "current_regimen_date",
    "changed",
    "baseline_datetime",
    "last_attended_visit_code",
    "last_attended_visit_datetime",
    "next_visit_code",
    "next_appt_datetime",
    "last_visit_code",
    "last_appt_datetime",
    "remaining_days_last_seen_to_final",
    "remaining_days_now_to_final",
    "remaining_days_next_to_final",
]].copy()

df_final = (
    df_final
    .sort_values("subject_identifier")
    .reset_index(drop=True)
)
df_final["remaining_days_last_seen_to_final"] = df_final["remaining_days_last_seen_to_final"].astype("float64").fillna(0)
df_final["remaining_days_now_to_final"] = df_final["remaining_days_now_to_final"].astype("float64").fillna(0)
df_final["remaining_days_next_to_final"] = df_final["remaining_days_next_to_final"].astype("float64").fillna(0)
df_final

In [None]:
# need from now until end of study
df_summary1 = (pd.merge(
    df_final.groupby(by=["current_regimen"]).remaining_days_now_to_final.sum(),
    df_final.groupby(by=["current_regimen"]).subject_identifier.count(), on="current_regimen")
    .rename(columns={
        "remaining_days_now_to_final": "days_medication_needed",
        "subject_identifier": "subjects"
    })
    .sort_values("days_medication_needed", ascending=False)
    .reset_index()
)
df_summary1

In [None]:
# need from last seen to end of study
df_summary2 = (pd.merge(
    df_final.groupby(by=["current_regimen"]).remaining_days_last_seen_to_final.sum(),
    df_final.groupby(by=["current_regimen"]).subject_identifier.count(), on="current_regimen")
    .rename(columns={
        "remaining_days_last_seen_to_final": "days_medication_needed",
        "subject_identifier": "subjects"
    })
    .sort_values("days_medication_needed", ascending=False)
    .reset_index()
)
df_summary2

In [None]:
# need from next to end of study
df_summary3 = (pd.merge(
    df_final.groupby(by=["current_regimen"]).remaining_days_next_to_final.sum(),
    df_final.groupby(by=["current_regimen"]).subject_identifier.count(), on="current_regimen")
    .rename(columns={
        "remaining_days_next_to_final": "days_medication_needed",
        "subject_identifier": "subjects"
    })
    .sort_values("days_medication_needed", ascending=False)
    .reset_index()
)

df_summary3

In [None]:
with pd.ExcelWriter(
        analysis_folder / "hiv_medication.xlsx",
        date_format="YYYY-MM-DD",
        datetime_format="YYYY-MM-DD HH:MM:SS"
) as writer:
    df_final.to_excel(writer, sheet_name="subjects", index=False)
    df_summary1.to_excel(writer, sheet_name="now to final", index=False)
    df_summary2.to_excel(writer, sheet_name="last seen to final", index=False)
    df_summary3.to_excel(writer, sheet_name="next to final", index=False)
