# Data Processing: Illusory Tempo - Forced Directional Response II

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from glob import glob

def dprime_and_c(hit_rate, fa_rate):

    # Get corresponding z-scores for the hit rate and false alarm rate
    zH = stats.norm.ppf(hit_rate)
    zF = stats.norm.ppf(fa_rate)

    # Calculate d' and C using z-scores
    dprime = zH - zF
    C = -(zH + zF) / 2

    return dprime, C

### Load data

In [None]:
# Find and load all data into one dataframe
datafiles = glob('../data/Il*.csv')
all_data = pd.concat((pd.read_csv(f) for f in datafiles))

# Select only response data and create new columns
d = all_data.loc[all_data.event == 'response', :].reset_index()
d = d.assign(
    response=np.array(d.key_press, dtype=int) == 50,  # Response is a boolean indicating whether they responded with "late"
    octave=[int(p[-1]) for p in d.pitch],  # Octave is the integer corresponding to the tone's octave
    chroma=[str(p[:-1]) for p in d.pitch]  # Chroma is a string indicating the tone's pitch class
)

# Mark trials for correctness
d = d.assign(correct = (d.response == (d.offset > 0)) | (d.offset == 0))

In [None]:
octaves = [3, 5, 7]
iois = [400, 600]

# Scores will be stored in a long-format table
scores = pd.DataFrame(columns=['subject', 'task_type', 'octave', 'ioi', 'subj_accuracy', 'subj_perc_resp_early',
                               'accuracy', 'perc_resp_early', 'subj_hit_rate', 'subj_fa_rate',
                               'hit_rate', 'fa_rate', 'subj_dprime', 'subj_C', 'dprime', 'C'])

# Select only trials with a nonzero offset (since zero-offset trials have no correct answer)
nonzero_offset_trials = d[d.offset != 0]

# Calculate scores for each subject
for s, subj in enumerate(d.subject.unique()):

    # Select all responses from the current subject
    subj_trials = nonzero_offset_trials[nonzero_offset_trials.subject == subj].reset_index()

    # Calculate general scores across all of a subject's trials
    task_type = subj_trials['type'][0]
    subj_accuracy = np.mean(subj_trials.correct)
    subj_perc_resp_early = np.mean(~subj_trials.response)
    subj_hit_rate = (np.sum((subj_trials.offset > 0) & subj_trials.response) + .5) / (np.sum(subj_trials.offset > 0) + 1)
    subj_fa_rate = (np.sum((subj_trials.offset < 0) & subj_trials.response) + .5) / (np.sum(subj_trials.offset < 0) + 1)
    subj_dprime, subj_C = dprime_and_c(subj_hit_rate, subj_fa_rate)

    # Calculate scores within each condition
    for i, octave in enumerate(octaves):
        for j, ioi in enumerate(iois):

            # Select all trials from the current condition
            trials = subj_trials[(subj_trials.octave == octave) & (subj_trials.interval == ioi)]

            # Create dictionary to store scores from current subject and condition
            condi_scores = dict(
                subject=subj,
                task_type=task_type,
                octave=octave,
                ioi=ioi,
                subj_accuracy=subj_accuracy,
                subj_perc_resp_early=subj_perc_resp_early,
                subj_hit_rate=subj_hit_rate,
                subj_fa_rate=subj_fa_rate,
                subj_dprime=subj_dprime,
                subj_C=subj_C
            )

            # Calculate accuracy and the percent of the time the participant responded with "early"
            condi_scores['accuracy'] = np.mean(trials.correct)
            condi_scores['perc_resp_early'] = np.mean(~trials.response)

            # Calculate hit and false alarm rates using Hautus (1995) adjustment to avoid 0s and 1s
            condi_scores['hit_rate'] = (np.sum((trials.offset > 0) & trials.response) + .5) / (np.sum(trials.offset > 0) + 1)
            condi_scores['fa_rate'] = (np.sum((trials.offset < 0) & trials.response) + .5) / (np.sum(trials.offset < 0) + 1)

            # Calculate d' and C based on the hit rate and false alarm rate
            condi_scores['dprime'], condi_scores['C'] = dprime_and_c(condi_scores['hit_rate'], condi_scores['fa_rate'])

            # Add current scores as a row to the full table of scores
            scores.loc[len(scores.index)] = condi_scores

### Save processed data

In [None]:
scores.to_csv('../data/scores.csv', index=False)