# Data Processing: Illusory Pitch

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from glob import glob

def dprime_and_c(hit_rate, fa_rate):
    
    # Get corresponding z-scores for the hit rate and false alarm rate
    zH = stats.norm.ppf(hit_rate)
    zF = stats.norm.ppf(fa_rate)
    
    # Calculate d' and C using z-scores
    dprime = zH - zF
    C = -(zH + zF) / 2
    
    return dprime, C

### Load data

In [None]:
# Find all data files
datafiles = glob('../data/Il*.csv')

# Load each data file and concatenate them into a single table
d = pd.concat((pd.read_csv(f) for f in datafiles))

# Select only non-pilot participants
d = d[d.code_version == 'v1.0']

# Select only trial response events
d = d[d.event == 'response']

# Recode the pitch shift and key press as 0 for "lower" and 1 for "higher"
d = d.assign(answer = d.pitch_shift == '+',
            response = d.key_press.astype(int) == 38)

# Mark whether each response was correct by comparing it to the correct answer
d = d.assign(correct = d.answer == d.response)

### Calculate scores within each subject and condition

In [None]:
# Define conditions as (octave, offset) pairs
conditions = [(3, -15), (3, 0), (3, 15), (5, -15), (5, 0), (5, 15)]

# Scores will be stored in a long-format table
scores = pd.DataFrame(columns=['subject', 'octave', 'offset',
                               'hit_rate', 'fa_rate', 'accuracy', 
                               'perc_resp_low', 'dprime', 'C'])

# Calculate scores for each subject
for s, subj in enumerate(d.subject.unique()):
    
    # Select all responses from the current subject
    subj_trials = d[d.subject == subj]
    
    # Calculate scores within each condition
    for i, condition in enumerate(conditions):
        
        # Select all trials from the current condition
        octave = condition[0]
        offset = condition[1]
        trials = subj_trials[(subj_trials.octave == octave) & (subj_trials.offset == offset)]
        
        # Create dictionary to store scores from current subject and condition
        condi_scores = {'subject': subj, 'octave': octave, 'offset': offset}
        
        # Calculate accuracy and the percent of the time the participant responded "lower"
        condi_scores['accuracy'] = np.mean(trials.correct)
        condi_scores['perc_resp_low'] = np.mean(~trials.response)
        
        # Calculate hit and false alarm rates using Hautus (1995) adjustment to avoid 0s and 1s
        condi_scores['hit_rate'] = (np.sum(trials.answer & trials.response) + .5) / (np.sum(trials.answer) + 1)
        condi_scores['fa_rate'] = (np.sum(~trials.answer & trials.response) + .5) / (np.sum(~trials.answer) + 1)
        
        # Calculate d' and C based on the hit rate and false alarm rate
        condi_scores['dprime'], condi_scores['C'] = dprime_and_c(condi_scores['hit_rate'], condi_scores['fa_rate'])
        
        # Add current scores as a row to the full table of scores
        scores.loc[len(scores.index)] = condi_scores

### Save processed scores to a file

In [None]:
scores.to_csv('../data/scores.csv', index=False)