### Imports

In [1]:
import os
import flywheel
import pandas as pd
from glob import glob
from os.path import join
from tqdm import tqdm_notebook as tqdm

### Setup

In [2]:
API_KEY = os.environ["FLYWHEEL_API"]
fw = flywheel.Flywheel(API_KEY)

### Create project CSV

In [14]:
dfs = list()

for project in ["engage", "engage2", "connectome", "rad"]:    
    df = pd.read_csv("/Volumes/hd_4tb/project/slice_timing/%s_slice_timing.csv" % project)
    df.rename(columns={"subNum": "subject", "timeSession": "session"}, inplace=True)
    df["project"] = project
    df = df[["project", "subject", "session", "task", "order"]]
    df.sort_values(["subject", "session", "task"], inplace=True)
    dfs.append(df)

df = pd.concat(dfs)
df.drop_duplicates(inplace=True)
df.to_csv("/Volumes/hd_4tb/project/all.csv", index=False)

# Flywheel information

- operator
- TR
- Datetime
- Weight
- Session label
- Slice thickness

### Helpers

In [29]:
def _project_map():
    _map = {
        "connhc":  "connectome",
        "connmdd": "connectome",
        "engage":  "engage",
        "rad":     "rad",
    }
    return _map

def _time_map():
    _map = {
        "bv":   "000",
        "2mo":  "2MO",
        "6mo":  "6MO",
        "12mo": "12MO",
        "24mo": "24MO"
    }
    return _map

def _subject_format(subject):
    if subject.startswith("rad"):
        return subject.replace("rad", "")
    return subject

def _engage_time(subject_id):
    for time_session in ["bv", "24mo", "12mo", "6mo", "2mo"]:
        if subject_id.lower().endswith(time_session):
            return _time_map()[time_session]
    return "000"

def _rad_time(subject_id):
    if subject_id.endswith("-2"):
        return "2MO"
    return "000"

def _narrow_by_subject(sessions, subject):
    subject = _subject_format(subject)
    sessions = [s for s in sessions if subject in s.subject.code.lower()]
    return sessions

def _narrow_by_session(sessions, project, time_session):
    project_label = project.label.lower()
    if project_label == "engage":
        sessions = [s for s in sessions if _engage_time(s.subject.code) == time_session]
    elif project_label == "rad":
        sessions = [s for s in sessions if _rad_time(s.subject.code) == time_session]
    elif project_label == "connectome":
        if time_session == "000":
            sessions = [s for s in sessions if len(fw.get_session(s.id).tags) == 0]
        else:
            sessions = [s for s in sessions if fw.get_session(s.id).tags and fw.get_session(s.id).tags.pop().upper() == time_session]
    return sessions

def _narrow_by_task(fw, sessions, task):
    for session in sessions:
        for acq if fw.get_session_acquisitions(session.id)

def _get_info(fw, acq):
    return dict()
        
def get_fw_info(fw, sessions, project, time_session, subject, task):
    """ Include p50, brave, unfear, mket, tms? """
    sessions = _narrow_by_subject(sessions, subject)
    if len(sessions) == 0:
        return False
    sessions = _narrow_by_session(sessions, project, time_session)
    if len(sessions) == 0:
        return False
    acqs = _narrow_by_task(fw, sessions, task)
    if len(acqs) == 0:
        return False
    if len(acqs) > 1:
        print("Multiple acqs", project, time_session, subject, task)
        return False
    info = _get_info(fw, acqs.pop())
    return info

### Main

In [30]:
root = "/Volumes/hd_4tb/raw/"
df = list()
for project_path in glob(join(root, "*")):
    project = os.path.basename(project_path)
    if project == "engage2":
        continue # deal with engage2 later
        
    ### Flywheel setup
    project = _project_map()[project]
    project = [p for p in fw.get_all_projects() if p.label.lower() == project][0]
    sessions = fw.get_project_sessions(project.id)
    
    ### Main loop
    for time_path in glob(join(project_path, "*")):
        time_session = os.path.basename(time_path)
        for subject_path in tqdm(glob(join(time_path, "*"))):
            subject = os.path.basename(subject_path)
            for task_path in glob(join(subject_path, "*")):
                task = os.path.basename(task_path)
                info = get_fw_info(fw, sessions, project, time_session, subject, task)
                if not info:
                    continue
                df.append(info)

HBox(children=(IntProgress(value=0, max=79), HTML(value='')))

3
3
3
3
2
2
2
2
2
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
4
4
4
4
4


KeyboardInterrupt: 

### Inconsistent slice order

This isn't very fast or efficient

In [4]:
df = pd.read_csv("/Volumes/hd_4tb/project/all.csv")
for project in df["project"].unique():
    p_df = df.copy()
    p_df = p_df[p_df["project"] == project]
    for subject in tqdm(p_df["subject"].unique()):
        s_df = p_df.copy()
        s_df = s_df[s_df["subject"] == subject]
        for time_session in s_df["session"]:
            t_df = s_df.copy()
            t_df = t_df[t_df["session"] == time_session]
            for task in t_df["task"].unique():
                tmp = t_df.copy()
                tmp = tmp[tmp["task"] == task]
                if tmp["order"].nunique() != 1:
                    print(project, subject, time_session, task)

HBox(children=(IntProgress(value=0, max=112), HTML(value='')))




HBox(children=(IntProgress(value=0, max=65), HTML(value='')))




HBox(children=(IntProgress(value=0, max=198), HTML(value='')))

connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16
connectome CONN122 1 16



HBox(children=(IntProgress(value=0, max=331), HTML(value='')))

rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5
rad RAD_333 1 3
rad RAD_333 1 5



### ENGAGE 2

The relevant information needs to be gathered from the dicoms