In [6]:
import dask.dataframe as dd
import altair as alt

%matplotlib inline

In [28]:
def scatter_brush(df, title):
    # Brush for selection
    brush = alt.selection(type='interval')

    # Scatter Plot
    points = alt.Chart(df).mark_point().encode(
        x=alt.X("raw_neighbor_corr:Q", scale=alt.Scale(zero=False)),
        y=alt.Y("t1_neighbor_corr:Q", scale=alt.Scale(zero=False)),
        color="mean_fd:Q",
        size="max_fd:Q",
        tooltip="subject_id:N",
    ).add_selection(brush).properties(title=title)

    # Base chart for data tables
    ranked_text = alt.Chart(df).mark_text().encode(
        y=alt.Y("row_number:O",axis=None)
    ).transform_window(
        row_number="row_number()"
    ).transform_filter(
        brush
    ).transform_window(
        rank="rank(row_number)"
    ).transform_filter(
        alt.datum.rank<20
    )

    # Data Tables
    subjects = ranked_text.encode(text="subject_id:N").properties(
        title="Subjects", width=125
    )

    # Build chart
    chart = alt.hconcat(
        points,
        subjects
    ).resolve_legend(
        color="independent"
    )
    
    return chart

In [2]:
df_camcan = dd.read_csv('s3://cam-can-mri/derivatives/qsiprep/sub-*/dwi/sub-*_desc-ImageQC_dwi.csv')
df_camcan = df_camcan.compute()
df_camcan.columns

Index(['raw_dimension_x', 'raw_dimension_y', 'raw_dimension_z',
       'raw_voxel_size_x', 'raw_voxel_size_y', 'raw_voxel_size_z', 'raw_max_b',
       'raw_neighbor_corr', 'raw_num_bad_slices', 'raw_num_directions',
       't1_dimension_x', 't1_dimension_y', 't1_dimension_z', 't1_voxel_size_x',
       't1_voxel_size_y', 't1_voxel_size_z', 't1_max_b', 't1_neighbor_corr',
       't1_num_bad_slices', 't1_num_directions', 'mean_fd', 'max_fd',
       'max_rotation', 'max_translation', 'max_rel_rotation',
       'max_rel_translation', 't1_dice_distance', 'file_name', 'subject_id',
       'session_id', 'task_id', 'acq_id', 'space_id', 'rec_id', 'run_id'],
      dtype='object')

In [29]:
scatter_brush(df=df_camcan, title="Cam-CAN")

In [4]:
df_hbn = dd.read_csv('s3://hbn-derivatives/derivatives/qsiprep/sub-*/dwi/sub-*_desc-ImageQC_dwi.csv')
df_hbn = df_hbn.compute()
df_hbn.columns

Index(['raw_dimension_x', 'raw_dimension_y', 'raw_dimension_z',
       'raw_voxel_size_x', 'raw_voxel_size_y', 'raw_voxel_size_z', 'raw_max_b',
       'raw_neighbor_corr', 'raw_num_bad_slices', 'raw_num_directions',
       't1_dimension_x', 't1_dimension_y', 't1_dimension_z', 't1_voxel_size_x',
       't1_voxel_size_y', 't1_voxel_size_z', 't1_max_b', 't1_neighbor_corr',
       't1_num_bad_slices', 't1_num_directions', 'mean_fd', 'max_fd',
       'max_rotation', 'max_translation', 'max_rel_rotation',
       'max_rel_translation', 't1_dice_distance', 'file_name', 'subject_id',
       'session_id', 'task_id', 'acq_id', 'space_id', 'rec_id', 'run_id'],
      dtype='object')

In [30]:
scatter_brush(df=df_hbn, title="HBN")