### Notebook to check the dates of the MRI scans

#### Date Sources
1. Suivi sheet 
2. Testing sheet
3. DICOM file 
4. DICOM header

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
DATASET_ROOT = "/home/nikhil/projects/Parkinsons/qpn/"

# Current nipoppy release
current_release = "Aug_2024"

data_release_dir = f"{DATASET_ROOT}/releases/{current_release}/"
tabular_data_release_dir = f"{data_release_dir}/tabular/"

current_manifest_csv = f"{data_release_dir}/manifest.csv"
MRI_acq_dates_Aug_csv = f"{tabular_data_release_dir}/recruitment/MRI_acqdata.csv"
MRI_acq_dates_Sept_csv = f"{tabular_data_release_dir}/recruitment/MRI_acqdata_Sept2024.csv"



### Read current manifest

In [None]:
manifest_df= pd.read_csv(current_manifest_csv)

manifest_participants = manifest_df["participant_id"].unique()
n_manifest_participants = len(manifest_participants)
print(f"Number of participants in manifest: {n_manifest_participants}")

sessions = manifest_df["session"].unique()
print(f"sessions: {sessions}")

per_session_counts = manifest_df.value_counts("session")
print(f"per_session_counts: {per_session_counts}")

manifest_df.head()

### MRI dates

In [None]:
mri_acq_dates_aug_df = pd.read_csv(MRI_acq_dates_Aug_csv)
mri_acq_dates_sept_df = pd.read_csv(MRI_acq_dates_Sept_csv, index_col=0)

mri_acq_dates_aug_df.head()

In [None]:
mri_acq_dates_sept_df = mri_acq_dates_sept_df.rename(columns={"scanner_acq_date": "scanner_acq_date_sept",
                                                              "participant_dicom_dir": "participant_dicom_dir_sept"})
mri_acq_dates_sept_df.head()

In [None]:
mri_acq_dates_sept_df[mri_acq_dates_sept_df["participant_id"] == "MNI0369"]

### Merge and compare dicoms and dates

In [None]:
mri_acq_dates_aug_df["scanner_acq_date"] = pd.to_datetime(mri_acq_dates_aug_df["scanner_acq_date"])
mri_acq_dates_sept_df["scanner_acq_date_sept"] = pd.to_datetime(mri_acq_dates_sept_df["scanner_acq_date_sept"])

# merge the two dataframes
mri_acq_dates_aug_df = mri_acq_dates_aug_df[["participant_id", "session", "scanner_acq_date", "participant_dicom_dir"]]
mri_acq_dates_df = pd.merge(mri_acq_dates_aug_df, mri_acq_dates_sept_df, on=["participant_id","session"], how="left")


mri_acq_dates_df["dicom_dir_check"] = mri_acq_dates_df["participant_dicom_dir"] == mri_acq_dates_df["participant_dicom_dir_sept"]
mri_acq_dates_df["acq_date_check"] = mri_acq_dates_df["scanner_acq_date"] == mri_acq_dates_df["scanner_acq_date_sept"]

# Get date differences
mri_acq_dates_df["date_diff"] = mri_acq_dates_df["scanner_acq_date"] - mri_acq_dates_df["scanner_acq_date_sept"]

mri_acq_dates_df.head()

### Identify participants with different dicom directories or acquisition dates

In [None]:
mismatch_df = mri_acq_dates_df[~mri_acq_dates_df["dicom_dir_check"] | ~mri_acq_dates_df["acq_date_check"]]

mismatch_df