In [101]:
import pandas as pd
import numpy as np

Create 2 functions:
1. `add_euclidian_distances(df)`: adds columns to passed dataframe for:
    - Movement between the current and last trial in mm
    - Distance between the current position in the trial and the center of reference space
2. `get_movement_summary(df, threshold=1.5)`: returns a new dataframe with 3 columns for each subject:
    - `max_between_trial_movement_dS_dL_dP`: maximun between trial movement recorded
    - `max_trial_movement_from_origin_dS_dL_dP`: maximm distance recorded from the center of reference space
    - `between_trial_movement_over_{threshold}`: number of instances of between trial movements that exceed the threshold specified

Notes (from [afni's `3dvolreg` docs](https://afni.nimh.nih.gov/pub/dist/doc/program_help/3dvolreg.html)) :
  - roll  = rotation about the I-S axis }
  - pitch = rotation about the R-L axis } degrees CCW
  - yaw   = rotation about the A-P axis }
  - dS  = displacement in the Superior direction  }
  - dL  = displacement in the Left direction      } mm
  - dP  = displacement in the Posterior direction }

In [156]:
def add_euclidian_distances(df):
    df['diff_last_dS'] = df['dS'] - df['dS'].shift(1)
    df['diff_last_dL'] = df['dL'] - df['dL'].shift(1)
    df['diff_last_dP'] = df['dP'] - df['dP'].shift(1)
    df.fillna(0)
    df['between_last_trial_movement_dS_dL_dP'] = np.sqrt(df['diff_last_dS']**2 + df['diff_last_dL']**2 + df['diff_last_dP']**2)
    df['trial_movement_from_origin_dS_dL_dP'] = np.sqrt(df['dS']**2 + df['dL']**2 + df['dP']**2)
    return df
    
def get_movement_summary(df, threshold=1.5):
    max_movements = (
        df[['between_last_trial_movement_dS_dL_dP','trial_movement_from_origin_dS_dL_dP']]
            .max(axis=0)
            .copy()
            .rename({
                "between_last_trial_movement_dS_dL_dP":"max_between_trial_movement_dS_dL_dP",
                "trial_movement_from_origin_dS_dL_dP":"max_trial_movement_from_origin_dS_dL_dP",
            })
    )
    above_thrs_move = (
        (df[['between_last_trial_movement_dS_dL_dP']]>1.5)
            .sum()
            .rename({
                "between_last_trial_movements_dS_dL_dP":"between_trial_movement_over_"+str(threshold)
            })
    )
            
    summary = pd.concat([max_movements,above_thrs_move])
    
    return summary

Create a list of subject labels for all the subjects in the dataset

In [158]:
subjs = ["sub-"+str(i) if i>9 else "sub-0"+str(i) for i in range(1,30)]
subjs

['sub-01',
 'sub-02',
 'sub-03',
 'sub-04',
 'sub-05',
 'sub-06',
 'sub-07',
 'sub-08',
 'sub-09',
 'sub-10',
 'sub-11',
 'sub-12',
 'sub-13',
 'sub-14',
 'sub-15',
 'sub-16',
 'sub-17',
 'sub-18',
 'sub-19',
 'sub-20',
 'sub-21',
 'sub-22',
 'sub-23',
 'sub-24',
 'sub-25',
 'sub-26',
 'sub-27',
 'sub-28',
 'sub-29']

Load each of the subjects movement file into a dictionary keyed by subject label (ie: `"subj-01":pd.DataFrame`}

In [150]:
subj_dfs = {sub:pd.read_csv(
                        "./data/afni_head_motion/"+sub+"_results/dfile_rall.1D",
                        names=["roll","pitch","yaw", "dS","dL","dP"],
                        header=None,
                        sep="\s+")
            for sub in subjs}
subj_dfs["sub-01"]

Unnamed: 0,roll,pitch,yaw,dS,dL,dP
0,-0.0927,0.2573,-0.0945,-0.1759,-0.0305,-0.0444
1,-0.0040,-0.0484,0.0129,-0.0070,-0.0101,-0.0045
2,-0.0000,-0.0000,0.0000,-0.0000,0.0000,0.0000
3,-0.0050,0.0185,-0.0055,-0.0114,-0.0095,0.0027
4,-0.0133,0.0748,-0.0097,-0.0173,-0.0245,0.0058
...,...,...,...,...,...,...
1771,0.3846,-1.6100,0.1905,4.6896,0.0914,-0.5238
1772,0.3122,-0.9665,0.2230,4.8031,0.2233,-0.5875
1773,0.3234,-0.7923,0.2275,4.8280,0.2539,-0.6520
1774,0.3230,-1.0059,0.2517,4.2369,0.1181,-0.6951


Create new dictionary `subj_dfs` where the keys are the subject labels and the values are corresponding DataFrames with the new distances columns added (using `add_euclidian_distances` function)

In [159]:
subj_dfs = {k:add_euclidian_distances(v) for k,v in subj_dfs.items()}

Create new dictionary `subj_summaries` where the keys are the subject labels and the values are corresponding DataFrames with the distance summaries (using the `get_movement_summary` function)

In [167]:
subj_summaries = {k:get_movement_summary(v) for k,v in subj_dfs.items()}
subj_summaries

{'sub-01': max_between_trial_movement_dS_dL_dP        3.278866
 max_trial_movement_from_origin_dS_dL_dP    5.494173
 between_last_trial_movement_dS_dL_dP       2.000000
 dtype: float64,
 'sub-02': max_between_trial_movement_dS_dL_dP        0.364302
 max_trial_movement_from_origin_dS_dL_dP    1.644770
 between_last_trial_movement_dS_dL_dP       0.000000
 dtype: float64,
 'sub-03': max_between_trial_movement_dS_dL_dP        0.456543
 max_trial_movement_from_origin_dS_dL_dP    1.159871
 between_last_trial_movement_dS_dL_dP       0.000000
 dtype: float64,
 'sub-04': max_between_trial_movement_dS_dL_dP        0.244903
 max_trial_movement_from_origin_dS_dL_dP    2.012193
 between_last_trial_movement_dS_dL_dP       0.000000
 dtype: float64,
 'sub-05': max_between_trial_movement_dS_dL_dP        3.555287
 max_trial_movement_from_origin_dS_dL_dP    4.073179
 between_last_trial_movement_dS_dL_dP       2.000000
 dtype: float64,
 'sub-06': max_between_trial_movement_dS_dL_dP        1.257643
 max_tr

In [170]:
summaries_df = pd.DataFrame(subj_summaries)
summaries_df.T

Unnamed: 0,max_between_trial_movement_dS_dL_dP,max_trial_movement_from_origin_dS_dL_dP,between_last_trial_movement_dS_dL_dP
sub-01,3.278866,5.494173,2.0
sub-02,0.364302,1.64477,0.0
sub-03,0.456543,1.159871,0.0
sub-04,0.244903,2.012193,0.0
sub-05,3.555287,4.073179,2.0
sub-06,1.257643,3.220474,0.0
sub-07,1.330638,1.702199,0.0
sub-08,2.002354,1.811183,1.0
sub-09,0.648505,5.408551,0.0
sub-10,3.136012,5.709135,4.0


In [171]:
(summaries_df.T).to_csv("data/movement_summaries.csv", header=True, index=True)