In [1]:
from pathlib import Path
import pandas as pd

### Input

In [2]:
# Define I/O paths
path_input: Path = Path("../../../data/original/adni/ADNIMERGE_14Jul2023.csv").resolve()
path_output: Path = Path("../../../data/processed/adni/converters_to_ad.csv").resolve()

In [3]:
# Define the columns to be used
usecols_: list[str] = ["RID", "VISCODE", "EXAMDATE", "DX_bl", "DX"]

In [4]:
# Read the data
df: pd.DataFrame = (
    pd.read_csv(path_input, usecols=usecols_, parse_dates=["EXAMDATE"])
    .dropna()
    .drop_duplicates()
    .convert_dtypes()
)

### Process

In [5]:
# Exclude participants diagnosed with AD at baseline
df: pd.DataFrame = df.loc[~df["DX_bl"].isin(["AD"])]

In [6]:
# Extract visit month
df["visit_month"] = 0
df.loc[~df["VISCODE"].isin(["bl"]), "visit_month"] = (
    df.loc[~df["VISCODE"].isin(["bl"]), "VISCODE"]
    .str.split(pat="m", expand=True)[1]
    .astype(int)
)

In [7]:
# Map cognitive status to numeric values
merge: pd.DataFrame = df.sort_values(
    by=["RID", "visit_month"], ascending=[True, True]
).reset_index(drop=True)
merge["stage"] = merge["DX"].map({"CN": 0, "MCI": 0, "Dementia": 2}).astype(int)

In [8]:
# Initialize output dataframe
convert: pd.DataFrame = pd.DataFrame(
    columns=["RID", "converted", "convert_month", "visit_month"]
)

In [9]:
# For each unique participant not diagnosed with dementia at baseline, check if there is a cognitive decline
for participant in merge["RID"].unique():
    # Extract the subset for the participant, list of cognitive indices, and the last visit month
    subset: pd.DataFrame = merge.loc[merge["RID"] == participant].reset_index(drop=True)
    stages: list[int] = subset["stage"].tolist()
    last_visit_month: int = subset["visit_month"].max()

    # If there is a change in cognitive index
    if len(set(stages)) > 1:
        converted: bool = False
        for i in range(len(stages) - 1):
            # If there exists an increase in cognitive index, the participant is a converter
            # Find the first increase in cognitive index and record the month of conversion
            if stages[i + 1] > stages[i]:
                converted: bool = True
                convert.loc[len(convert)] = [
                    participant,
                    True,
                    subset["visit_month"][i + 1],
                    last_visit_month,
                ]
                break
        if not converted:
            # If the cognitive index never increased, the participant is not a converter
            convert.loc[len(convert)] = [
                participant,
                False,
                last_visit_month,
                last_visit_month,
            ]
    else:
        # If the cognitive index never changed, the participant is not a converter
        convert.loc[len(convert)] = [
            participant,
            False,
            last_visit_month,
            last_visit_month,
        ]

In [10]:
# Specify integer variable data types
convert["convert_month"] = convert["convert_month"].astype(int)
convert["visit_month"] = convert["visit_month"].astype(int)

### Output

In [11]:
# Output
convert.to_csv(path_output, index=False)