Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ The data to be analyzed should be stored in two pandas Series of the same size,
rta.fit(rt, accuracy)
```

The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.meanrt_` and `rta.meanacc_`.
The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.mean_rt_` and `rta.mean_accuracy_`.

## Test 1: A simple smoke test

Expand Down Expand Up @@ -95,8 +95,8 @@ def test_rtanalysis_fit():
rta = RTAnalysis()
rta.fit(test_df.rt, test_df.accuracy)

assert np.allclose(meanRT, rta.meanrt_)
assert np.allclose(meanAcc, rta.meanacc_)
assert np.allclose(meanRT, rta.mean_rt_)
assert np.allclose(meanAcc, rta.mean_accuracy_)
```

We generate the data with known mean and accuracy values, fit the model using our function, and then confirm that our estimates are basically equal to the actual values. We use `np.allclose()` rather than a test for equality because sometimes the values will be off by a very small amount due to the numerical precision of the computer; an equality test would treat those as different, but `np.allclose` allows some tolerance in its test.
Expand Down Expand Up @@ -174,8 +174,8 @@ def simulated_data(params):
def test_rtanalysis_fit(simulated_data, params):
rta = RTAnalysis()
rta.fit(simulated_data.rt, simulated_data.accuracy)
assert np.allclose(params['meanRT'], rta.meanrt_)
assert np.allclose(params['meanAcc'], rta.meanacc_)
assert np.allclose(params['meanRT'], rta.mean_rt_)
assert np.allclose(params['meanAcc'], rta.mean_accuracy_)


def test_rtanalysis_checkfail(simulated_data, params):
Expand All @@ -199,8 +199,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
rta = RTAnalysis()
if meanAcc > 0:
rta.fit(test_df.rt, test_df.accuracy)
assert np.allclose(meanRT, rta.meanrt_)
assert np.allclose(meanAcc, rta.meanacc_)
assert np.allclose(meanRT, rta.mean_rt_)
assert np.allclose(meanAcc, rta.mean_accuracy_)
else:
with pytest.raises(ValueError):
rta.fit(test_df.rt, test_df.accuracy)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
black
coverage
flake8
numpy
Expand Down
1 change: 1 addition & 0 deletions rtanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Analysis of response data to estimate accuracy from response time (RT)."""
53 changes: 35 additions & 18 deletions rtanalysis/generate_testdata.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,40 @@
import pandas as pd
"""Utility module for handling the generation of test data."""
import numpy as np
import pandas as pd
import scipy.stats


def generate_test_df(meanRT, sdRT, meanAcc, n=100):
"""
generate simulated RT data for testing
def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100):
"""Generate simulated RT data for testing.

Args:
meanRT (float): mean RT (for correct trials)
sdRT (float): std deviation of RT (for correct trials)
meanAcc (float): mean accuracy (proportion, 0 <= meanAcc <= 1)
sdcutoff ([type]): outlier cutoff (default None for no cutoff)
"""
Parameters
----------
mean_rt : float
Mean response time for correct trials
sd_rt : float
Standard deviation of the response time in correct trials
mean_accuracy : float
Mean accuracy across trials (between 0 and 1)
n : int, optional
Number of observations to generate, by default 100

Returns
-------
pd.DataFrame
Generated mock data
"""
rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n))

# get random accuracy values and threshold for intended proportion
accuracy_continuous = np.random.rand(n)
accuracy = pd.Series(
accuracy_continuous
< scipy.stats.scoreatpercentile(accuracy_continuous, 100 * meanAcc)
< scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy)
)

# scale the correct RTs only
rt_correct = rt.mask(~accuracy)
rt_scaled = scale_values(rt_correct, meanRT, sdRT)
rt_scaled = scale_values(rt_correct, mean_rt, sd_rt)

# NB: .where() replaces values where the condition is False
rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt)
Expand All @@ -34,14 +43,22 @@ def generate_test_df(meanRT, sdRT, meanAcc, n=100):


def scale_values(values, mean, sd):
"""scale values by given mean/sd
"""Scale values by given mean/SD.

Parameters
----------
values : array-like
Values to be scaled
mean : float
Target mean
sd : float
Target standard deviation

Args:
values (array-like): values to be scaled
mean (float): intended mean
sd (float): intended standard deviation
Returns
-------
array-like
Scaled values
"""
values = values * (sd / np.std(values))
values = (values - np.mean(values)) + mean

return values
125 changes: 81 additions & 44 deletions rtanalysis/rtanalysis.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,114 @@
"""example function to analyze reaction times
- given a data frame with RT and accuracy,
compute mean RT for correct trials and mean accuracy
"""Example class to analyze reaction times.

Given a data frame with RT and accuracy, compute mean RT for correct trials and
mean accuracy.
"""
# %%
import pandas as pd


# %%
class RTAnalysis:
"""[summary]"""
"""Response time (RT) analysis."""

def __init__(self, outlier_cutoff_sd=None):
"""
RT analysis
"""Initialize a new RTAnalysis instance.

Parameters:
-----------
outlier_cutoff_sd: standard deviation cutoff for long RT outliers (default: no cutoff)
Parameters
----------
outlier_cutoff_sd : float, optional
Standard deviation cutoff for long RT outliers, by default None
"""
self.outlier_cutoff_sd = outlier_cutoff_sd
self.meanrt_ = None
self.meanacc_ = None
self.mean_rt_ = None
self.mean_accuracy_ = None

def fit(self, rt, accuracy, verbose=True):
"""[summary]

Args:
rt (Series of floats): response times for each trial
accuracy (Series of booleans): accuracy for each trial
"""Fit response time to accuracy.

Parameters
----------
rt : pd.Series
Response time per trial
accuracy : pd.Series
Accuracy per trial
verbose : bool, optional
Whether to print verbose output or not, by default True

Raises
------
ValueError
RT/accuracy length mismatch
ValueError
Accuracy is 0
"""

rt = self._ensure_series_type(rt)
accuracy = self._ensure_series_type(accuracy)

try:
assert rt.shape[0] == accuracy.shape[0]
except AssertionError as e:
raise ValueError("rt and accuracy must be the same length!") from e
self._validate_length(rt, accuracy)

# ensure that accuracy values are boolean
assert not set(accuracy.unique()).difference([True, False])
# Ensure that accuracy values are boolean.
assert accuracy.dtype == bool

if self.outlier_cutoff_sd is not None:
cutoff = rt.std() * self.outlier_cutoff_sd
if verbose:
print(f"outlier rejection excluded {(rt > cutoff).sum()} trials")
rt = rt.mask(rt > cutoff)
rt = self.reject_outlier_rt(rt, verbose=verbose)

self.meanacc_ = accuracy.mean()
self.mean_accuracy_ = accuracy.mean()
try:
assert self.meanacc_ > 0
assert self.mean_accuracy_ > 0
except AssertionError as e:
raise ValueError("accuracy is zero") from e
raise ValueError("Accuracy is zero!") from e

rt = rt.mask(~accuracy)
self.meanrt_ = rt.mean()
self.mean_rt_ = rt.mean()

if verbose:
print(f"mean RT: {self.meanrt_}")
print(f"mean accuracy: {self.meanacc_}")
print(f"mean RT: {self.mean_rt_}")
print(f"mean accuracy: {self.mean_accuracy_}")

@staticmethod
def _validate_length(rt, accuracy):
"""Validate response time and accuracy series lengths.

Parameters
----------
rt : pd.Series
Response time values
accuracy : _type_
Accuracy values

Raises
------
ValueError
Length mismatch
"""
same_length = rt.shape[0] == accuracy.shape[0]
try:
assert same_length
except AssertionError as e:
raise ValueError("RT and accuracy must be the same length!") from e


@staticmethod
def _ensure_series_type(var):
"""return variable as a pandas Series or raise exception if
not possible
"""Return variable as a pandas Series.

Args:
var (array-like): variable to convert
Parameters
----------
var : Iterable
Variable to be converted

Returns:
series (pandas Series): converted variable
Returns
-------
pd.Series
Variable values as a pandas Series
"""

if type(var) is not pd.core.series.Series:
if not isinstance(var, pd.Series):
var = pd.Series(var)
return var

def reject_outlier_rt(self, rt, verbose=True):
if self.outlier_cutoff_sd is None:
return rt
cutoff = rt.std() * self.outlier_cutoff_sd
if verbose:
n_excluded = (rt > cutoff).sum()
print(f"Outlier rejection excluded {n_excluded} trials.")
return rt.mask(rt > cutoff)
1 change: 0 additions & 1 deletion tests/test_1_smoketest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""test suite for rtanalysis
"""
import pytest
from rtanalysis.rtanalysis import RTAnalysis


Expand Down
4 changes: 2 additions & 2 deletions tests/test_2_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ def test_rtanalysis_fit():
meanAcc = 0.8
test_df = generate_test_df(meanRT, sdRT, meanAcc)
rta.fit(test_df.rt, test_df.accuracy)
assert np.allclose(meanRT, rta.meanrt_)
assert np.allclose(meanAcc, rta.meanacc_)
assert np.allclose(meanRT, rta.mean_rt_)
assert np.allclose(meanAcc, rta.mean_accuracy_)
4 changes: 2 additions & 2 deletions tests/test_4_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def simulated_data(params):
def test_rtanalysis_fit(simulated_data, params):
rta = RTAnalysis()
rta.fit(simulated_data.rt, simulated_data.accuracy)
assert np.allclose(params["meanRT"], rta.meanrt_)
assert np.allclose(params["meanAcc"], rta.meanacc_)
assert np.allclose(params["meanRT"], rta.mean_rt_)
assert np.allclose(params["meanAcc"], rta.mean_accuracy_)


def test_rtanalysis_checkfail(simulated_data, params):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_5_parametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
rta = RTAnalysis()
if meanAcc > 0:
rta.fit(test_df.rt, test_df.accuracy)
assert np.allclose(meanRT, rta.meanrt_)
assert np.allclose(meanAcc, rta.meanacc_)
assert np.allclose(meanRT, rta.mean_rt_)
assert np.allclose(meanAcc, rta.mean_accuracy_)
else:
with pytest.raises(ValueError):
rta.fit(test_df.rt, test_df.accuracy)