### Imports

In [1]:
import os
import flywheel
import pandas as pd
from glob import glob
from os.path import join
import nibabel as nib
from tqdm import tqdm_notebook as tqdm

### Setup

In [2]:
API_KEY = os.environ["FLYWHEEL_API"]
fw = flywheel.Flywheel(API_KEY)

### Create project CSV

In [14]:
dfs = list()

for project in ["engage", "engage2", "connectome", "rad"]:    
    df = pd.read_csv("/Volumes/hd_4tb/project/slice_timing/%s_slice_timing.csv" % project)
    df.rename(columns={"subNum": "subject", "timeSession": "session"}, inplace=True)
    df["project"] = project
    df = df[["project", "subject", "session", "task", "order"]]
    df.sort_values(["subject", "session", "task"], inplace=True)
    dfs.append(df)

df = pd.concat(dfs)
df.drop_duplicates(inplace=True)
df.to_csv("/Volumes/hd_4tb/project/all.csv", index=False)

# Flywheel information

### Helpers

In [54]:
def _needed(df, project, time_session, subject, task):
    selected = df[(
        (df["project"]      == project) &
        (df["time_session"] == time_session) &
        (df["subject"]      == subject) &
        (df["task"]         == task)
    )]
    assert len(selected) < 2, "Multiple found for %s %s %s %s" % (project, time_session, subject, task)
    return len(selected) == 0

### Time session

In [29]:
def _time_map():
    _map = {
        "bv":   "000",
        "2mo":  "2MO",
        "6mo":  "6MO",
        "12mo": "12MO",
        "24mo": "24MO"
    }
    return _map

def _engage_time(subject_id):
    if len(subject_id) == 7:
        return "000"
    tail = subject_id[7:]
    if tail.lower() == "bv":
        return "000"
    return tail.upper()

def _rad_time(subject_id):
    if subject_id.endswith("-2"):
        return "2MO"
    return "000"

def _narrow_by_session(sessions, project, time_session):
    if project == "engage":
        sessions = [s for s in sessions if _engage_time(s.subject.code) == time_session]
    elif project == "rad":
        sessions = [s for s in sessions if _rad_time(s.subject.code) == time_session]
    elif project in ["connhc", "connmdd"]:
        if time_session == "000":
            sessions = [s for s in sessions if len(fw.get_session(s.id).tags) == 0]
        else:
            sessions = [s for s in sessions if fw.get_session(s.id).tags and fw.get_session(s.id).tags.pop().upper() == time_session]
    return sessions

### Project / subject

In [45]:
def _subject_format(subject):
    if subject.startswith("rad"):
        return subject.replace("rad", "")
    return subject

def _narrow_by_subject(sessions, subject):
    subject = _subject_format(subject)
    sessions = [s for s in sessions if s.subject.code.lower().startswith(subject)] # change for RAD
    return sessions

def _project_map():
    _map = {
        "connhc":  "connectome",
        "connmdd": "connectome",
        "engage":  "engage",
        "rad":     "rad",
    }
    return _map

### Task

In [46]:
def _task_map():
    _map = {
        "gonogo":       "go_no_go",
        "conscious":    "conscious",
        "nonconscious": "nonconscious",
        "workingmemMB": "tfmri_wm",
        "workingmemSB": "n_back",
    }
    return _map

def _task_match(task, acq_label):
    label = acq_label.lower().replace("-", "_").replace(" ", "_")
    search = _task_map()[task]
    if "donotuse" in label or "nope" in label:
        return False
    if search == "conscious" and "nonconscious" in label:
        return False
    if search in label:
        return True
    return False

def _narrow_by_task(fw, sessions, task):
    acqs = list()
    for session in sessions:
        for acq in fw.get_session_acquisitions(session.id):
            if _task_match(task, acq.label):
                acqs.append(acq)
    return acqs

### Info

- operator, TR, datetime, weight, session label, slice thickness

In [51]:
def _safe_add(_dict1, _dict2, _keys):
    for i in range(len(_keys) - 1):
        k = _keys[i]
        if k in _dict2:
            _dict2 = _dict2[k]
    k = _keys[-1]
    if k in _dict2:
        _dict1[k] = _dict2[k]

def _get_dicom(acq):
    files = acq["files"]
    dicoms = [f for f in files if f.name.endswith(".dicom.zip")]
    if len(dicoms) == 1:
        return dicoms.pop()
    return False
        
def _get_info(fw, acq):
    info = dict()
    acq = fw.get_acquisition(acq.id)
    session = fw.get_session(acq.session)
    dicom = _get_dicom(acq)
    info["id"] = acq.id
    info["label"] = acq.label
    info["session_id"] = session.id
    info["session_label"] = session.label
    info["operator"] = session.operator
    info["weight"] = session["weight"]
    if dicom:
        for feature in [
            "SliceLocation", "TriggerTime", "EchoTime",
            "AcquisitionDate", "AcquisitionTime",
            "SliceThickness", "ReconstructionDiameter",
            "SeriesDescription", "InPlanePhaseEncodingDirection",
            "FlipAngle", "MagneticFieldStrength",
            "RepetitionTime", "BitsStored", "SAR"
            "ImagingFrequency", "ReceiveCoilName",
            "ScanningSequence"
        ]:
            _safe_add(info, dicom, ["info", feature])
    return info

def get_fw_info(fw, sessions, project, time_session, subject, task):
    """ Include p50, brave, unfear, mket, tms? """
    sessions = _narrow_by_subject(sessions, subject)
    if len(sessions) == 0:
        return False
    sessions = _narrow_by_session(sessions, project, time_session)
    if len(sessions) == 0:
        return False
    acqs = _narrow_by_task(fw, sessions, task)
    if len(acqs) == 0:
        return False
    if len(acqs) > 1:
        print("Multiple acqs", project, time_session, subject, task)
        return False
    info = _get_info(fw, acqs.pop())
    info["project"] = project
    info["time_session"] = time_session
    info["subject"] = subject
    info["task"] = task
    return info

### Main

conn032, baseline, has a whole lot weird with it.  Ignore

In [55]:
root = "/Volumes/hd_4tb/raw/"
info_path = "/Users/pbezuhov/Desktop/info.csv"
info_df = pd.read_csv(info_path, dtype={"time_session": str})
df = list()

for project_path in glob(join(root, "*")):
    project = os.path.basename(project_path)
    if project == "engage2":
        continue # deal with engage2 later
                
    ### Flywheel setup
    fw_project = _project_map()[project]
    fw_project = [p for p in fw.get_all_projects() if p.label.lower() == fw_project][0]
    sessions = fw.get_project_sessions(fw_project.id)
    
    ### Main loop
    for time_path in glob(join(project_path, "*")):
        time_session = os.path.basename(time_path)
        for subject_path in tqdm(glob(join(time_path, "*"))):
            subject = os.path.basename(subject_path)
            for task_path in glob(join(subject_path, "*")):
                task = os.path.basename(task_path)
                if task == "structural":
                    continue
                if not _needed(info_df, project, time_session, subject, task):
                    continue
                info = get_fw_info(fw, sessions, project, time_session, subject, task)
                if not info:
                    continue
                df.append(info)
                
pd.concat([pd.DataFrame(df), info_df]).to_csv(info_path, index=False)

HBox(children=(IntProgress(value=0, max=79), HTML(value='')))

HBox(children=(IntProgress(value=0, max=112), HTML(value='')))

HBox(children=(IntProgress(value=0, max=107), HTML(value='')))

HBox(children=(IntProgress(value=0, max=76), HTML(value='')))

HBox(children=(IntProgress(value=0, max=58), HTML(value='')))

HBox(children=(IntProgress(value=0, max=87), HTML(value='')))

HBox(children=(IntProgress(value=0, max=87), HTML(value='')))

HBox(children=(IntProgress(value=0, max=324), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

### Inconsistent slice order

This isn't very fast or efficient

In [4]:
df = pd.read_csv("/Volumes/hd_4tb/project/all.csv")
for project in df["project"].unique():
    p_df = df.copy()
    p_df = p_df[p_df["project"] == project]
    for subject in tqdm(p_df["subject"].unique()):
        s_df = p_df.copy()
        s_df = s_df[s_df["subject"] == subject]
        for time_session in s_df["session"]:
            t_df = s_df.copy()
            t_df = t_df[t_df["session"] == time_session]
            for task in t_df["task"].unique():
                tmp = t_df.copy()
                tmp = tmp[tmp["task"] == task]
                if tmp["order"].nunique() != 1:
                    print(project, subject, time_session, task)

HBox(children=(IntProgress(value=0, max=112), HTML(value='')))




HBox(children=(IntProgress(value=0, max=65), HTML(value='')))




HBox(children=(IntProgress(value=0, max=198), HTML(value='')))

connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16



HBox(children=(IntProgress(value=0, max=331), HTML(value='')))

rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5



### ENGAGE 2

The relevant information needs to be gathered from the dicoms