In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import pandas as pd
import pprint
from mpl_toolkits.mplot3d import Axes3D
import matplotlib as mpl
import csv
import json
import seaborn as sns
from scipy import stats
from scipy.ndimage.filters import gaussian_filter

mpl.rcParams['figure.dpi'] = 144
mpl.rcParams['figure.figsize'] = (10, 7)

DATAFRAME_DIR = "/Users/jeremygordon/Google Drive/Academic/# UC Berkeley ISchool PhD/Research/# Current/Covert Embodied Choice/DataFrames"

## Load data frames (generated by build_dfs.ipynb)

In [33]:
trial_df = pd.read_pickle(DATAFRAME_DIR + '/trial_df.pickle')
tracking_df = pd.read_pickle(DATAFRAME_DIR + '/tracking_df.pickle')
fixation_df = pd.read_pickle(DATAFRAME_DIR + '/fixation_df.pickle')
subject_df = pd.read_pickle(DATAFRAME_DIR + '/subject_df.pickle')
with open("../ExperimentData/hands.json") as f:
    HANDS = json.load(f)

## Extract features into new dataframe

### Label

Which card was selected (0 left, 1 right)

### Exclude

Rounds where no card selected.

### Misc

Include column for adversary (though will not be used for feature or label). Need to segment and perform comparative analysis.


### Universal Features

* Observation phase data
   * ctrl x/y/z and velocity at 10 snapshots (one per sec?), sure to include final ms before switch
   * counts and count fractions of each fixation object
   * min/max x/y/z pos of ctr
   * max x/y/z velocity of ctr
   * max x/y/z velocity of hmd
   * mean x/y/z pos relative to start (ctrl)
   * eyes closed fraction
   * last fixation object
   * second to last fixation object
   * Raw gaze vectors, x/y/z mean and sd
   
### Personalized Features

* Similar, but positions relative to baseline (e.g. controller start)?

In [129]:
FEATS = [
    'fix_count_card_r', 'fix_count_card_l', 'fix_frac_card_r', 'fix_frac_card_l',
    'fix_dur_card_l', 'fix_dur_card_r',
    'ctr_x_max', 'ctr_y_max', 'ctr_z_max',
    'ctr_x_min', 'ctr_y_min', 'ctr_z_min',
    'ctr_vx_max', 'ctr_vy_max', 'ctr_vz_max',
    'ctr_vx_min', 'ctr_vy_min', 'ctr_vz_min',
    'hmd_x_max', 'hmd_y_max', 'hmd_z_max',
    'hmd_x_min', 'hmd_y_min', 'hmd_z_min',
    'hmd_vx_max', 'hmd_vy_max', 'hmd_vz_max',
    'hmd_vx_min', 'hmd_vy_min', 'hmd_vz_min',
    'eyes_closed_pct',
    'first_card_fix_l_ts', 'first_card_fix_r_ts',
    'last_card_fix_l_ts', 'last_card_fix_r_ts',
    'gaze_tgt_x_mean', 'gaze_tgt_y_mean', 'gaze_tgt_z_mean',
    'gaze_tgt_x_sd', 'gaze_tgt_y_sd', 'gaze_tgt_z_sd'
]
for i in range(1, 11):
    # Controller positions
    FEATS.append('ctr_x_s%d' % i)
    FEATS.append('ctr_y_s%d' % i)
    FEATS.append('ctr_z_s%d' % i)
    # Velocities
    FEATS.append('ctr_vx_s%d' % i)
    FEATS.append('ctr_vy_s%d' % i)
    FEATS.append('ctr_vz_s%d' % i)
    
    


In [130]:
ml_df = pd.DataFrame(columns=FEATS + ['label'])

rows = []
ml_adv_trials = trial_df[(trial_df.choice_made == True) & (trial_df.practice == False) & (trial_df.with_adversary == True)]
for trial_key, trial in ml_adv_trials.iterrows():
    data = {}
    
    obs_start_ts = trial.ts_start
    obs_end_ts = trial.ts_selection

    # Get fixations during observation period
    this_trial_obs_fixations = fixation_df[
        (fixation_df.trial == trial.trial) & 
        (fixation_df.subject == trial.subject) & 
        (fixation_df.start_ts < obs_end_ts)
    ]
    # Truncate fixations that cross boundary (assume they ended when observation ends)
    this_trial_obs_fixations.loc[this_trial_obs_fixations.stop_ts > obs_end_ts, 'stop_ts'] = obs_end_ts
    left_card_fixes = this_trial_obs_fixations[this_trial_obs_fixations.objectName == "CardOnTable0"]
    right_card_fixes = this_trial_obs_fixations[this_trial_obs_fixations.objectName == "CardOnTable1"]    
    data['fix_dur_card_l'] = left_card_fixes.duration.sum()
    data['fix_dur_card_r'] = right_card_fixes.duration.sum()
    if len(left_card_fixes):
        data['first_card_fix_l_ts'] = left_card_fixes['start_ts'].min() - obs_start_ts
        data['last_card_fix_l_ts'] = left_card_fixes['stop_ts'].max() - obs_start_ts        
    else:
        data['first_card_fix_l_ts'] = 0.0
        data['last_card_fix_l_ts'] = 0.0        
    if len(right_card_fixes):
        data['first_card_fix_r_ts'] = right_card_fixes['start_ts'].min() - obs_start_ts    
        data['last_card_fix_r_ts'] = right_card_fixes['stop_ts'].max() - obs_start_ts            
    else:
        data['first_card_fix_r_ts'] = 0.0
        data['last_card_fix_r_ts'] = 0.0        
    data['fix_count_card_l'] = len(left_card_fixes)
    data['fix_count_card_r'] = len(right_card_fixes)
    total_fixes = len(this_trial_obs_fixations)
    total_table_card_fixes = data['fix_count_card_l'] + data['fix_count_card_r']
    if total_fixes:
        data['fix_frac_card_l'] = data['fix_count_card_l'] / total_fixes
        data['fix_frac_card_r'] = data['fix_count_card_r'] / total_fixes
    else:
        data['fix_frac_card_l'] = 0.0
        data['fix_frac_card_r'] = 0.0
        
    # Get raw gaze data during observation period
    
    # Get controller & hmd positions during observation period
    this_trial_obs_tracking = tracking_df[
        (tracking_df.trial == trial.trial) & 
        (tracking_df.subject == trial.subject) & 
        (tracking_df.ts < obs_end_ts)
    ].sort_values('ts')
    n_records = len(this_trial_obs_tracking)
    snapshots = 10
    skip_records = int(n_records / snapshots)
    idxs = skip_records * np.arange(1, snapshots) - 1
    # TODO: Confirm we got last record
    CTRL_POS = ['ctr_x', 'ctr_y', 'ctr_z']
    # TODO: Should we downsample and do a better velocity estimate?
    velocities = this_trial_obs_tracking[1:][CTRL_POS].values - this_trial_obs_tracking[:-1][CTRL_POS].values
    snapshot_records = this_trial_obs_tracking.iloc[idxs]
    snapshot_velocities = velocities[idxs, :]
    i = 0
    # TODO: Add HMD
    for key, rec in snapshot_records.iterrows():
        data['ctr_x_s%d' % (i+1)] = rec['ctr_x']
        data['ctr_y_s%d' % (i+1)] = rec['ctr_y']
        data['ctr_z_s%d' % (i+1)] = rec['ctr_z']
        vel_x, vel_y, vel_z = snapshot_velocities[i, :]
        data['ctr_vx_s%d' % (i+1)] = vel_x
        data['ctr_vy_s%d' % (i+1)] = vel_y
        data['ctr_vz_s%d' % (i+1)] = vel_z
        i += 1
        
    data['gaze_tgt_x_mean'] = this_trial_obs_tracking['gaze_tgt_x'].mean()
    data['gaze_tgt_y_mean'] = this_trial_obs_tracking['gaze_tgt_y'].mean()
    data['gaze_tgt_z_mean'] = this_trial_obs_tracking['gaze_tgt_z'].mean()    
    data['gaze_tgt_x_sd'] = this_trial_obs_tracking['gaze_tgt_x'].std()
    data['gaze_tgt_y_sd'] = this_trial_obs_tracking['gaze_tgt_y'].std()
    data['gaze_tgt_z_sd'] = this_trial_obs_tracking['gaze_tgt_z'].std()    
        
    data['eyes_closed_pct'] = this_trial_obs_tracking.blinking.mean()

    # Calculate velocities

    data['label'] = trial.subject_choice
    row = pd.Series(data, name=trial_key)
    rows.append(row)
    
ml_df = ml_df.append(rows)
print("ML df with %d row(s)" % len(ml_df))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


ML df with 566 row(s)


In [133]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# TODO: Normalize features?

TEST_FEATS = []
for feat in FEATS:
    if ml_df[feat].isna().sum() == 0:
        TEST_FEATS.append(feat)
X, y = ml_df[TEST_FEATS], ml_df['label']
# clf = LogisticRegression(random_state=0).fit(X, y)
clf = RandomForestClassifier(max_depth=2, random_state=0).fit(X, y)
score = clf.score(X, y)

# Baseline
print("Accuracy: %.2f" % score)
print("Baseline: %.2f" % ml_df['label'].mean())

Accuracy: 0.77
Baseline: 0.54
