diff --git a/README.md b/README.md index 600de21..a79a8db 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ The data to be analyzed should be stored in two pandas Series of the same size, rta.fit(rt, accuracy) ``` -The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.meanrt_` and `rta.meanacc_`. +The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.mean_rt_` and `rta.mean_accuracy_`. ## Test 1: A simple smoke test @@ -95,8 +95,8 @@ def test_rtanalysis_fit(): rta = RTAnalysis() rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) ``` We generate the data with known mean and accuracy values, fit the model using our function, and then confirm that our estimates are basically equal to the actual values. We use `np.allclose()` rather than a test for equality because sometimes the values will be off by a very small amount due to the numerical precision of the computer; an equality test would treat those as different, but `np.allclose` allows some tolerance in its test. @@ -174,8 +174,8 @@ def simulated_data(params): def test_rtanalysis_fit(simulated_data, params): rta = RTAnalysis() rta.fit(simulated_data.rt, simulated_data.accuracy) - assert np.allclose(params['meanRT'], rta.meanrt_) - assert np.allclose(params['meanAcc'], rta.meanacc_) + assert np.allclose(params['meanRT'], rta.mean_rt_) + assert np.allclose(params['meanAcc'], rta.mean_accuracy_) def test_rtanalysis_checkfail(simulated_data, params): @@ -199,8 +199,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc): rta = RTAnalysis() if meanAcc > 0: rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) else: with pytest.raises(ValueError): rta.fit(test_df.rt, test_df.accuracy) diff --git a/requirements.txt b/requirements.txt index 28e49cb..93ab415 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +black coverage flake8 numpy diff --git a/rtanalysis/__init__.py b/rtanalysis/__init__.py index e69de29..9c19607 100644 --- a/rtanalysis/__init__.py +++ b/rtanalysis/__init__.py @@ -0,0 +1 @@ +"""Analysis of response data to estimate accuracy from response time (RT).""" \ No newline at end of file diff --git a/rtanalysis/generate_testdata.py b/rtanalysis/generate_testdata.py index 9e5574c..89474c1 100644 --- a/rtanalysis/generate_testdata.py +++ b/rtanalysis/generate_testdata.py @@ -1,31 +1,40 @@ -import pandas as pd +"""Utility module for handling the generation of test data.""" import numpy as np +import pandas as pd import scipy.stats -def generate_test_df(meanRT, sdRT, meanAcc, n=100): - """ - generate simulated RT data for testing +def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100): + """Generate simulated RT data for testing. - Args: - meanRT (float): mean RT (for correct trials) - sdRT (float): std deviation of RT (for correct trials) - meanAcc (float): mean accuracy (proportion, 0 <= meanAcc <= 1) - sdcutoff ([type]): outlier cutoff (default None for no cutoff) - """ + Parameters + ---------- + mean_rt : float + Mean response time for correct trials + sd_rt : float + Standard deviation of the response time in correct trials + mean_accuracy : float + Mean accuracy across trials (between 0 and 1) + n : int, optional + Number of observations to generate, by default 100 + Returns + ------- + pd.DataFrame + Generated mock data + """ rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n)) # get random accuracy values and threshold for intended proportion accuracy_continuous = np.random.rand(n) accuracy = pd.Series( accuracy_continuous - < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * meanAcc) + < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy) ) # scale the correct RTs only rt_correct = rt.mask(~accuracy) - rt_scaled = scale_values(rt_correct, meanRT, sdRT) + rt_scaled = scale_values(rt_correct, mean_rt, sd_rt) # NB: .where() replaces values where the condition is False rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt) @@ -34,14 +43,22 @@ def generate_test_df(meanRT, sdRT, meanAcc, n=100): def scale_values(values, mean, sd): - """scale values by given mean/sd + """Scale values by given mean/SD. + + Parameters + ---------- + values : array-like + Values to be scaled + mean : float + Target mean + sd : float + Target standard deviation - Args: - values (array-like): values to be scaled - mean (float): intended mean - sd (float): intended standard deviation + Returns + ------- + array-like + Scaled values """ values = values * (sd / np.std(values)) values = (values - np.mean(values)) + mean - return values diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index a067556..96195ee 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -1,77 +1,114 @@ -"""example function to analyze reaction times -- given a data frame with RT and accuracy, -compute mean RT for correct trials and mean accuracy +"""Example class to analyze reaction times. + +Given a data frame with RT and accuracy, compute mean RT for correct trials and +mean accuracy. """ -# %% import pandas as pd -# %% class RTAnalysis: - """[summary]""" + """Response time (RT) analysis.""" def __init__(self, outlier_cutoff_sd=None): - """ - RT analysis + """Initialize a new RTAnalysis instance. - Parameters: - ----------- - outlier_cutoff_sd: standard deviation cutoff for long RT outliers (default: no cutoff) + Parameters + ---------- + outlier_cutoff_sd : float, optional + Standard deviation cutoff for long RT outliers, by default None """ self.outlier_cutoff_sd = outlier_cutoff_sd - self.meanrt_ = None - self.meanacc_ = None + self.mean_rt_ = None + self.mean_accuracy_ = None def fit(self, rt, accuracy, verbose=True): - """[summary] - - Args: - rt (Series of floats): response times for each trial - accuracy (Series of booleans): accuracy for each trial + """Fit response time to accuracy. + + Parameters + ---------- + rt : pd.Series + Response time per trial + accuracy : pd.Series + Accuracy per trial + verbose : bool, optional + Whether to print verbose output or not, by default True + + Raises + ------ + ValueError + RT/accuracy length mismatch + ValueError + Accuracy is 0 """ - rt = self._ensure_series_type(rt) accuracy = self._ensure_series_type(accuracy) - try: - assert rt.shape[0] == accuracy.shape[0] - except AssertionError as e: - raise ValueError("rt and accuracy must be the same length!") from e + self._validate_length(rt, accuracy) - # ensure that accuracy values are boolean - assert not set(accuracy.unique()).difference([True, False]) + # Ensure that accuracy values are boolean. + assert accuracy.dtype == bool - if self.outlier_cutoff_sd is not None: - cutoff = rt.std() * self.outlier_cutoff_sd - if verbose: - print(f"outlier rejection excluded {(rt > cutoff).sum()} trials") - rt = rt.mask(rt > cutoff) + rt = self.reject_outlier_rt(rt, verbose=verbose) - self.meanacc_ = accuracy.mean() + self.mean_accuracy_ = accuracy.mean() try: - assert self.meanacc_ > 0 + assert self.mean_accuracy_ > 0 except AssertionError as e: - raise ValueError("accuracy is zero") from e + raise ValueError("Accuracy is zero!") from e rt = rt.mask(~accuracy) - self.meanrt_ = rt.mean() + self.mean_rt_ = rt.mean() if verbose: - print(f"mean RT: {self.meanrt_}") - print(f"mean accuracy: {self.meanacc_}") + print(f"mean RT: {self.mean_rt_}") + print(f"mean accuracy: {self.mean_accuracy_}") + + @staticmethod + def _validate_length(rt, accuracy): + """Validate response time and accuracy series lengths. + + Parameters + ---------- + rt : pd.Series + Response time values + accuracy : _type_ + Accuracy values + + Raises + ------ + ValueError + Length mismatch + """ + same_length = rt.shape[0] == accuracy.shape[0] + try: + assert same_length + except AssertionError as e: + raise ValueError("RT and accuracy must be the same length!") from e + @staticmethod def _ensure_series_type(var): - """return variable as a pandas Series or raise exception if - not possible + """Return variable as a pandas Series. - Args: - var (array-like): variable to convert + Parameters + ---------- + var : Iterable + Variable to be converted - Returns: - series (pandas Series): converted variable + Returns + ------- + pd.Series + Variable values as a pandas Series """ - - if type(var) is not pd.core.series.Series: + if not isinstance(var, pd.Series): var = pd.Series(var) return var + + def reject_outlier_rt(self, rt, verbose=True): + if self.outlier_cutoff_sd is None: + return rt + cutoff = rt.std() * self.outlier_cutoff_sd + if verbose: + n_excluded = (rt > cutoff).sum() + print(f"Outlier rejection excluded {n_excluded} trials.") + return rt.mask(rt > cutoff) diff --git a/tests/test_1_smoketest.py b/tests/test_1_smoketest.py index aaf861a..3b3888d 100644 --- a/tests/test_1_smoketest.py +++ b/tests/test_1_smoketest.py @@ -1,6 +1,5 @@ """test suite for rtanalysis """ -import pytest from rtanalysis.rtanalysis import RTAnalysis diff --git a/tests/test_2_fit.py b/tests/test_2_fit.py index 46ed4e3..b3bea7e 100644 --- a/tests/test_2_fit.py +++ b/tests/test_2_fit.py @@ -15,5 +15,5 @@ def test_rtanalysis_fit(): meanAcc = 0.8 test_df = generate_test_df(meanRT, sdRT, meanAcc) rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) diff --git a/tests/test_4_fixture.py b/tests/test_4_fixture.py index ed3e1dd..5cc514a 100644 --- a/tests/test_4_fixture.py +++ b/tests/test_4_fixture.py @@ -23,8 +23,8 @@ def simulated_data(params): def test_rtanalysis_fit(simulated_data, params): rta = RTAnalysis() rta.fit(simulated_data.rt, simulated_data.accuracy) - assert np.allclose(params["meanRT"], rta.meanrt_) - assert np.allclose(params["meanAcc"], rta.meanacc_) + assert np.allclose(params["meanRT"], rta.mean_rt_) + assert np.allclose(params["meanAcc"], rta.mean_accuracy_) def test_rtanalysis_checkfail(simulated_data, params): diff --git a/tests/test_5_parametric.py b/tests/test_5_parametric.py index e5c148b..c394841 100644 --- a/tests/test_5_parametric.py +++ b/tests/test_5_parametric.py @@ -18,8 +18,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc): rta = RTAnalysis() if meanAcc > 0: rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) else: with pytest.raises(ValueError): rta.fit(test_df.rt, test_df.accuracy)