From 044a4ad3b042691c62cbbbea2030e2ad7c524c6e Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 13:28:15 +0300 Subject: [PATCH 01/12] Added black to requirements. --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 28e49cb..93ab415 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +black coverage flake8 numpy From 5b928efaee2f330f2e5604b8b6dc1d2a71c14249 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 13:29:10 +0300 Subject: [PATCH 02/12] Removed unused import. --- tests/test_1_smoketest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_1_smoketest.py b/tests/test_1_smoketest.py index aaf861a..3b3888d 100644 --- a/tests/test_1_smoketest.py +++ b/tests/test_1_smoketest.py @@ -1,6 +1,5 @@ """test suite for rtanalysis """ -import pytest from rtanalysis.rtanalysis import RTAnalysis From ed305cc38ef02f38fcbf8d9be963d62e6c263868 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 13:44:16 +0300 Subject: [PATCH 03/12] Organized docstrings a bit. --- rtanalysis/rtanalysis.py | 58 +++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index a067556..2b66603 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -1,6 +1,7 @@ -"""example function to analyze reaction times -- given a data frame with RT and accuracy, -compute mean RT for correct trials and mean accuracy +"""Example class to analyze reaction times. + +Given a data frame with RT and accuracy, compute mean RT for correct trials and +mean accuracy. """ # %% import pandas as pd @@ -8,28 +9,39 @@ # %% class RTAnalysis: - """[summary]""" + """Response time (RT) analysis.""" def __init__(self, outlier_cutoff_sd=None): - """ - RT analysis + """Initialize a new RTAnalysis instance. - Parameters: - ----------- - outlier_cutoff_sd: standard deviation cutoff for long RT outliers (default: no cutoff) + Parameters + ---------- + outlier_cutoff_sd : float, optional + Standard deviation cutoff for long RT outliers, by default None """ self.outlier_cutoff_sd = outlier_cutoff_sd self.meanrt_ = None self.meanacc_ = None def fit(self, rt, accuracy, verbose=True): - """[summary] - - Args: - rt (Series of floats): response times for each trial - accuracy (Series of booleans): accuracy for each trial + """Fit response time to accuracy. + + Parameters + ---------- + rt : pd.Series + Response time per trial + accuracy : pd.Series + Accuracy per trial + verbose : bool, optional + Whether to print verbose output or not, by default True + + Raises + ------ + ValueError + RT/accuracy length mismatch + ValueError + Accuracy is 0 """ - rt = self._ensure_series_type(rt) accuracy = self._ensure_series_type(accuracy) @@ -62,16 +74,18 @@ def fit(self, rt, accuracy, verbose=True): @staticmethod def _ensure_series_type(var): - """return variable as a pandas Series or raise exception if - not possible + """Return variable as a pandas Series. - Args: - var (array-like): variable to convert + Parameters + ---------- + var : Iterable + Variable to be converted - Returns: - series (pandas Series): converted variable + Returns + ------- + pd.Series + Variable values as a pandas Series """ - if type(var) is not pd.core.series.Series: var = pd.Series(var) return var From a01d3c705b5154e8e2a7e9fba9d182d405cd834b Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 13:49:32 +0300 Subject: [PATCH 04/12] Removed cell blocks. --- rtanalysis/rtanalysis.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index 2b66603..b0631cc 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -3,11 +3,9 @@ Given a data frame with RT and accuracy, compute mean RT for correct trials and mean accuracy. """ -# %% import pandas as pd -# %% class RTAnalysis: """Response time (RT) analysis.""" From a0322a28d998a61aa8eac4057dc4dfd4d221f193 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 13:54:50 +0300 Subject: [PATCH 05/12] Added module docstring. --- rtanalysis/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rtanalysis/__init__.py b/rtanalysis/__init__.py index e69de29..9c19607 100644 --- a/rtanalysis/__init__.py +++ b/rtanalysis/__init__.py @@ -0,0 +1 @@ +"""Analysis of response data to estimate accuracy from response time (RT).""" \ No newline at end of file From af3732da984f5e028e45813a3d444cbf931cde97 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:01:50 +0300 Subject: [PATCH 06/12] Organized docstrings. --- rtanalysis/generate_testdata.py | 53 ++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/rtanalysis/generate_testdata.py b/rtanalysis/generate_testdata.py index 9e5574c..89474c1 100644 --- a/rtanalysis/generate_testdata.py +++ b/rtanalysis/generate_testdata.py @@ -1,31 +1,40 @@ -import pandas as pd +"""Utility module for handling the generation of test data.""" import numpy as np +import pandas as pd import scipy.stats -def generate_test_df(meanRT, sdRT, meanAcc, n=100): - """ - generate simulated RT data for testing +def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100): + """Generate simulated RT data for testing. - Args: - meanRT (float): mean RT (for correct trials) - sdRT (float): std deviation of RT (for correct trials) - meanAcc (float): mean accuracy (proportion, 0 <= meanAcc <= 1) - sdcutoff ([type]): outlier cutoff (default None for no cutoff) - """ + Parameters + ---------- + mean_rt : float + Mean response time for correct trials + sd_rt : float + Standard deviation of the response time in correct trials + mean_accuracy : float + Mean accuracy across trials (between 0 and 1) + n : int, optional + Number of observations to generate, by default 100 + Returns + ------- + pd.DataFrame + Generated mock data + """ rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n)) # get random accuracy values and threshold for intended proportion accuracy_continuous = np.random.rand(n) accuracy = pd.Series( accuracy_continuous - < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * meanAcc) + < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy) ) # scale the correct RTs only rt_correct = rt.mask(~accuracy) - rt_scaled = scale_values(rt_correct, meanRT, sdRT) + rt_scaled = scale_values(rt_correct, mean_rt, sd_rt) # NB: .where() replaces values where the condition is False rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt) @@ -34,14 +43,22 @@ def generate_test_df(meanRT, sdRT, meanAcc, n=100): def scale_values(values, mean, sd): - """scale values by given mean/sd + """Scale values by given mean/SD. + + Parameters + ---------- + values : array-like + Values to be scaled + mean : float + Target mean + sd : float + Target standard deviation - Args: - values (array-like): values to be scaled - mean (float): intended mean - sd (float): intended standard deviation + Returns + ------- + array-like + Scaled values """ values = values * (sd / np.std(values)) values = (values - np.mean(values)) + mean - return values From e87887252fce08b0009537759f3a168419e249a9 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:04:23 +0300 Subject: [PATCH 07/12] Changed meanrt and meanacc attributes to mean_rt and mean_accuracy. --- rtanalysis/rtanalysis.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index b0631cc..285265e 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -18,8 +18,8 @@ def __init__(self, outlier_cutoff_sd=None): Standard deviation cutoff for long RT outliers, by default None """ self.outlier_cutoff_sd = outlier_cutoff_sd - self.meanrt_ = None - self.meanacc_ = None + self.mean_rt_ = None + self.mean_accuracy_ = None def fit(self, rt, accuracy, verbose=True): """Fit response time to accuracy. @@ -57,18 +57,18 @@ def fit(self, rt, accuracy, verbose=True): print(f"outlier rejection excluded {(rt > cutoff).sum()} trials") rt = rt.mask(rt > cutoff) - self.meanacc_ = accuracy.mean() + self.mean_accuracy_ = accuracy.mean() try: - assert self.meanacc_ > 0 + assert self.mean_accuracy_ > 0 except AssertionError as e: raise ValueError("accuracy is zero") from e rt = rt.mask(~accuracy) - self.meanrt_ = rt.mean() + self.mean_rt_ = rt.mean() if verbose: - print(f"mean RT: {self.meanrt_}") - print(f"mean accuracy: {self.meanacc_}") + print(f"mean RT: {self.mean_rt_}") + print(f"mean accuracy: {self.mean_accuracy_}") @staticmethod def _ensure_series_type(var): From be055170b94c02d0ab64a5d143e520d94b501a05 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:08:27 +0300 Subject: [PATCH 08/12] Fixed RTAnalysis attribute names in tests. --- README.md | 14 +++++++------- tests/test_2_fit.py | 4 ++-- tests/test_4_fixture.py | 4 ++-- tests/test_5_parametric.py | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 600de21..a79a8db 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ The data to be analyzed should be stored in two pandas Series of the same size, rta.fit(rt, accuracy) ``` -The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.meanrt_` and `rta.meanacc_`. +The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.mean_rt_` and `rta.mean_accuracy_`. ## Test 1: A simple smoke test @@ -95,8 +95,8 @@ def test_rtanalysis_fit(): rta = RTAnalysis() rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) ``` We generate the data with known mean and accuracy values, fit the model using our function, and then confirm that our estimates are basically equal to the actual values. We use `np.allclose()` rather than a test for equality because sometimes the values will be off by a very small amount due to the numerical precision of the computer; an equality test would treat those as different, but `np.allclose` allows some tolerance in its test. @@ -174,8 +174,8 @@ def simulated_data(params): def test_rtanalysis_fit(simulated_data, params): rta = RTAnalysis() rta.fit(simulated_data.rt, simulated_data.accuracy) - assert np.allclose(params['meanRT'], rta.meanrt_) - assert np.allclose(params['meanAcc'], rta.meanacc_) + assert np.allclose(params['meanRT'], rta.mean_rt_) + assert np.allclose(params['meanAcc'], rta.mean_accuracy_) def test_rtanalysis_checkfail(simulated_data, params): @@ -199,8 +199,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc): rta = RTAnalysis() if meanAcc > 0: rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) else: with pytest.raises(ValueError): rta.fit(test_df.rt, test_df.accuracy) diff --git a/tests/test_2_fit.py b/tests/test_2_fit.py index 46ed4e3..b3bea7e 100644 --- a/tests/test_2_fit.py +++ b/tests/test_2_fit.py @@ -15,5 +15,5 @@ def test_rtanalysis_fit(): meanAcc = 0.8 test_df = generate_test_df(meanRT, sdRT, meanAcc) rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) diff --git a/tests/test_4_fixture.py b/tests/test_4_fixture.py index ed3e1dd..5cc514a 100644 --- a/tests/test_4_fixture.py +++ b/tests/test_4_fixture.py @@ -23,8 +23,8 @@ def simulated_data(params): def test_rtanalysis_fit(simulated_data, params): rta = RTAnalysis() rta.fit(simulated_data.rt, simulated_data.accuracy) - assert np.allclose(params["meanRT"], rta.meanrt_) - assert np.allclose(params["meanAcc"], rta.meanacc_) + assert np.allclose(params["meanRT"], rta.mean_rt_) + assert np.allclose(params["meanAcc"], rta.mean_accuracy_) def test_rtanalysis_checkfail(simulated_data, params): diff --git a/tests/test_5_parametric.py b/tests/test_5_parametric.py index e5c148b..c394841 100644 --- a/tests/test_5_parametric.py +++ b/tests/test_5_parametric.py @@ -18,8 +18,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc): rta = RTAnalysis() if meanAcc > 0: rta.fit(test_df.rt, test_df.accuracy) - assert np.allclose(meanRT, rta.meanrt_) - assert np.allclose(meanAcc, rta.meanacc_) + assert np.allclose(meanRT, rta.mean_rt_) + assert np.allclose(meanAcc, rta.mean_accuracy_) else: with pytest.raises(ValueError): rta.fit(test_df.rt, test_df.accuracy) From 6c8213b72d3c82fc495f74d39c623da6a10336a3 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:11:03 +0300 Subject: [PATCH 09/12] Fixed type validation to use isinstance rather than type. --- rtanalysis/rtanalysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index 285265e..3bd0a84 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -84,6 +84,6 @@ def _ensure_series_type(var): pd.Series Variable values as a pandas Series """ - if type(var) is not pd.core.series.Series: + if not isinstance(var, pd.Series): var = pd.Series(var) return var From 9d732683d33c7bc9859715a57cf20b5713a24086 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:16:52 +0300 Subject: [PATCH 10/12] Moved length validation to a static method. --- rtanalysis/rtanalysis.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index 3bd0a84..0ac332a 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -43,10 +43,7 @@ def fit(self, rt, accuracy, verbose=True): rt = self._ensure_series_type(rt) accuracy = self._ensure_series_type(accuracy) - try: - assert rt.shape[0] == accuracy.shape[0] - except AssertionError as e: - raise ValueError("rt and accuracy must be the same length!") from e + self._validate_length(rt, accuracy) # ensure that accuracy values are boolean assert not set(accuracy.unique()).difference([True, False]) @@ -69,6 +66,15 @@ def fit(self, rt, accuracy, verbose=True): if verbose: print(f"mean RT: {self.mean_rt_}") print(f"mean accuracy: {self.mean_accuracy_}") + + @staticmethod + def _validate_length(rt, accuracy): + same_length = rt.shape[0] == accuracy.shape[0] + try: + assert same_length + except AssertionError as e: + raise ValueError("RT and accuracy must be the same length!") from e + @staticmethod def _ensure_series_type(var): From 7eaca071a29b0097d3e42d8671c3fe4fd1965240 Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:21:35 +0300 Subject: [PATCH 11/12] Added missing length validation method docstring and simplified boolean accuracy validation. --- rtanalysis/rtanalysis.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index 0ac332a..d712b10 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -45,8 +45,8 @@ def fit(self, rt, accuracy, verbose=True): self._validate_length(rt, accuracy) - # ensure that accuracy values are boolean - assert not set(accuracy.unique()).difference([True, False]) + # Ensure that accuracy values are boolean. + assert accuracy.dtype == bool if self.outlier_cutoff_sd is not None: cutoff = rt.std() * self.outlier_cutoff_sd @@ -69,6 +69,20 @@ def fit(self, rt, accuracy, verbose=True): @staticmethod def _validate_length(rt, accuracy): + """Validate response time and accuracy series lengths. + + Parameters + ---------- + rt : pd.Series + Response time values + accuracy : _type_ + Accuracy values + + Raises + ------ + ValueError + Length mismatch + """ same_length = rt.shape[0] == accuracy.shape[0] try: assert same_length From aad281879cd47a253902269e3b6fbc631476746f Mon Sep 17 00:00:00 2001 From: Zvi Baratz Date: Wed, 26 Oct 2022 14:31:25 +0300 Subject: [PATCH 12/12] Moved outlier rejection to a dedicated method. --- rtanalysis/rtanalysis.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py index d712b10..96195ee 100644 --- a/rtanalysis/rtanalysis.py +++ b/rtanalysis/rtanalysis.py @@ -48,17 +48,13 @@ def fit(self, rt, accuracy, verbose=True): # Ensure that accuracy values are boolean. assert accuracy.dtype == bool - if self.outlier_cutoff_sd is not None: - cutoff = rt.std() * self.outlier_cutoff_sd - if verbose: - print(f"outlier rejection excluded {(rt > cutoff).sum()} trials") - rt = rt.mask(rt > cutoff) + rt = self.reject_outlier_rt(rt, verbose=verbose) self.mean_accuracy_ = accuracy.mean() try: assert self.mean_accuracy_ > 0 except AssertionError as e: - raise ValueError("accuracy is zero") from e + raise ValueError("Accuracy is zero!") from e rt = rt.mask(~accuracy) self.mean_rt_ = rt.mean() @@ -107,3 +103,12 @@ def _ensure_series_type(var): if not isinstance(var, pd.Series): var = pd.Series(var) return var + + def reject_outlier_rt(self, rt, verbose=True): + if self.outlier_cutoff_sd is None: + return rt + cutoff = rt.std() * self.outlier_cutoff_sd + if verbose: + n_excluded = (rt > cutoff).sum() + print(f"Outlier rejection excluded {n_excluded} trials.") + return rt.mask(rt > cutoff)