diff --git a/README.md b/README.md
index 600de21..a79a8db 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ The data to be analyzed should be stored in two pandas Series of the same size,
 rta.fit(rt, accuracy)
 ```
 
-The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.meanrt_` and `rta.meanacc_`.
+The resulting estimates are printed to the screen (assuming that the `verbose` flag is not set to false) and also stored to internal variables `rta.mean_rt_` and `rta.mean_accuracy_`.
 
 ## Test 1: A simple smoke test
 
@@ -95,8 +95,8 @@ def test_rtanalysis_fit():
     rta = RTAnalysis()
     rta.fit(test_df.rt, test_df.accuracy)
 
-    assert np.allclose(meanRT, rta.meanrt_)
-    assert np.allclose(meanAcc, rta.meanacc_)
+    assert np.allclose(meanRT, rta.mean_rt_)
+    assert np.allclose(meanAcc, rta.mean_accuracy_)
 ```
 
 We generate the data with known mean and accuracy values, fit the model using our function, and then confirm that our estimates are basically equal to the actual values. We use `np.allclose()` rather than a test for equality because sometimes the values will be off by a very small amount due to the numerical precision of the computer; an equality test would treat those as different, but `np.allclose` allows some tolerance in its test.
@@ -174,8 +174,8 @@ def simulated_data(params):
 def test_rtanalysis_fit(simulated_data, params):
     rta = RTAnalysis()
     rta.fit(simulated_data.rt, simulated_data.accuracy)
-    assert np.allclose(params['meanRT'], rta.meanrt_)
-    assert np.allclose(params['meanAcc'], rta.meanacc_)
+    assert np.allclose(params['meanRT'], rta.mean_rt_)
+    assert np.allclose(params['meanAcc'], rta.mean_accuracy_)
 
 
 def test_rtanalysis_checkfail(simulated_data, params):
@@ -199,8 +199,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
     rta = RTAnalysis()
     if meanAcc > 0:
         rta.fit(test_df.rt, test_df.accuracy)
-        assert np.allclose(meanRT, rta.meanrt_)
-        assert np.allclose(meanAcc, rta.meanacc_)
+        assert np.allclose(meanRT, rta.mean_rt_)
+        assert np.allclose(meanAcc, rta.mean_accuracy_)
     else:
         with pytest.raises(ValueError):
             rta.fit(test_df.rt, test_df.accuracy)
diff --git a/requirements.txt b/requirements.txt
index 28e49cb..93ab415 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+black
 coverage
 flake8
 numpy
diff --git a/rtanalysis/__init__.py b/rtanalysis/__init__.py
index e69de29..9c19607 100644
--- a/rtanalysis/__init__.py
+++ b/rtanalysis/__init__.py
@@ -0,0 +1 @@
+"""Analysis of response data to estimate accuracy from response time (RT)."""
\ No newline at end of file
diff --git a/rtanalysis/generate_testdata.py b/rtanalysis/generate_testdata.py
index 9e5574c..89474c1 100644
--- a/rtanalysis/generate_testdata.py
+++ b/rtanalysis/generate_testdata.py
@@ -1,31 +1,40 @@
-import pandas as pd
+"""Utility module for handling the generation of test data."""
 import numpy as np
+import pandas as pd
 import scipy.stats
 
 
-def generate_test_df(meanRT, sdRT, meanAcc, n=100):
-    """
-    generate simulated RT data for testing
+def generate_test_df(mean_rt, sd_rt, mean_accuracy, n=100):
+    """Generate simulated RT data for testing.
 
-    Args:
-        meanRT (float): mean RT (for correct trials)
-        sdRT (float): std deviation of RT (for correct trials)
-        meanAcc (float): mean accuracy (proportion, 0 <= meanAcc <= 1)
-        sdcutoff ([type]): outlier cutoff (default None for no cutoff)
-    """
+    Parameters
+    ----------
+    mean_rt : float
+        Mean response time for correct trials
+    sd_rt : float
+        Standard deviation of the response time in correct trials
+    mean_accuracy : float
+        Mean accuracy across trials (between 0 and 1)
+    n : int, optional
+        Number of observations to generate, by default 100
 
+    Returns
+    -------
+    pd.DataFrame
+        Generated mock data
+    """
     rt = pd.Series(scipy.stats.weibull_min.rvs(2, loc=1, size=n))
 
     # get random accuracy values and threshold for intended proportion
     accuracy_continuous = np.random.rand(n)
     accuracy = pd.Series(
         accuracy_continuous
-        < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * meanAcc)
+        < scipy.stats.scoreatpercentile(accuracy_continuous, 100 * mean_accuracy)
     )
 
     # scale the correct RTs only
     rt_correct = rt.mask(~accuracy)
-    rt_scaled = scale_values(rt_correct, meanRT, sdRT)
+    rt_scaled = scale_values(rt_correct, mean_rt, sd_rt)
 
     # NB: .where() replaces values where the condition is False
     rt_scaled_with_inaccurate_rts = rt_scaled.where(accuracy, rt)
@@ -34,14 +43,22 @@ def generate_test_df(meanRT, sdRT, meanAcc, n=100):
 
 
 def scale_values(values, mean, sd):
-    """scale values by given mean/sd
+    """Scale values by given mean/SD.
+
+    Parameters
+    ----------
+    values : array-like
+        Values to be scaled
+    mean : float
+        Target mean
+    sd : float
+        Target standard deviation
 
-    Args:
-        values (array-like): values to be scaled
-        mean (float): intended mean
-        sd (float): intended standard deviation
+    Returns
+    -------
+    array-like
+        Scaled values
     """
     values = values * (sd / np.std(values))
     values = (values - np.mean(values)) + mean
-
     return values
diff --git a/rtanalysis/rtanalysis.py b/rtanalysis/rtanalysis.py
index a067556..96195ee 100644
--- a/rtanalysis/rtanalysis.py
+++ b/rtanalysis/rtanalysis.py
@@ -1,77 +1,114 @@
-"""example function to analyze reaction times
-- given a data frame with RT and accuracy,
-compute mean RT for correct trials and mean accuracy
+"""Example class to analyze reaction times.
+
+Given a data frame with RT and accuracy, compute mean RT for correct trials and
+mean accuracy.
 """
-# %%
 import pandas as pd
 
 
-# %%
 class RTAnalysis:
-    """[summary]"""
+    """Response time (RT) analysis."""
 
     def __init__(self, outlier_cutoff_sd=None):
-        """
-        RT analysis
+        """Initialize a new RTAnalysis instance.
 
-        Parameters:
-        -----------
-        outlier_cutoff_sd: standard deviation cutoff for long RT outliers (default: no cutoff)
+        Parameters
+        ----------
+        outlier_cutoff_sd : float, optional
+            Standard deviation cutoff for long RT outliers, by default None
         """
         self.outlier_cutoff_sd = outlier_cutoff_sd
-        self.meanrt_ = None
-        self.meanacc_ = None
+        self.mean_rt_ = None
+        self.mean_accuracy_ = None
 
     def fit(self, rt, accuracy, verbose=True):
-        """[summary]
-
-        Args:
-            rt (Series of floats): response times for each trial
-            accuracy (Series of booleans): accuracy for each trial
+        """Fit response time to accuracy.
+
+        Parameters
+        ----------
+        rt : pd.Series
+            Response time per trial
+        accuracy : pd.Series
+            Accuracy per trial
+        verbose : bool, optional
+            Whether to print verbose output or not, by default True
+
+        Raises
+        ------
+        ValueError
+            RT/accuracy length mismatch
+        ValueError
+            Accuracy is 0
         """
-
         rt = self._ensure_series_type(rt)
         accuracy = self._ensure_series_type(accuracy)
 
-        try:
-            assert rt.shape[0] == accuracy.shape[0]
-        except AssertionError as e:
-            raise ValueError("rt and accuracy must be the same length!") from e
+        self._validate_length(rt, accuracy)
 
-        # ensure that accuracy values are boolean
-        assert not set(accuracy.unique()).difference([True, False])
+        # Ensure that accuracy values are boolean.
+        assert accuracy.dtype == bool
 
-        if self.outlier_cutoff_sd is not None:
-            cutoff = rt.std() * self.outlier_cutoff_sd
-            if verbose:
-                print(f"outlier rejection excluded {(rt > cutoff).sum()} trials")
-            rt = rt.mask(rt > cutoff)
+        rt = self.reject_outlier_rt(rt, verbose=verbose)
 
-        self.meanacc_ = accuracy.mean()
+        self.mean_accuracy_ = accuracy.mean()
         try:
-            assert self.meanacc_ > 0
+            assert self.mean_accuracy_ > 0
         except AssertionError as e:
-            raise ValueError("accuracy is zero") from e
+            raise ValueError("Accuracy is zero!") from e
 
         rt = rt.mask(~accuracy)
-        self.meanrt_ = rt.mean()
+        self.mean_rt_ = rt.mean()
 
         if verbose:
-            print(f"mean RT: {self.meanrt_}")
-            print(f"mean accuracy: {self.meanacc_}")
+            print(f"mean RT: {self.mean_rt_}")
+            print(f"mean accuracy: {self.mean_accuracy_}")
+    
+    @staticmethod
+    def _validate_length(rt, accuracy):
+        """Validate response time and accuracy series lengths.
+
+        Parameters
+        ----------
+        rt : pd.Series
+            Response time values
+        accuracy : _type_
+            Accuracy values
+
+        Raises
+        ------
+        ValueError
+            Length mismatch
+        """
+        same_length = rt.shape[0] == accuracy.shape[0]
+        try:
+            assert same_length
+        except AssertionError as e:
+            raise ValueError("RT and accuracy must be the same length!") from e
+
 
     @staticmethod
     def _ensure_series_type(var):
-        """return variable as a pandas Series or raise exception if
-        not possible
+        """Return variable as a pandas Series.
 
-        Args:
-            var (array-like): variable to convert
+        Parameters
+        ----------
+        var : Iterable
+            Variable to be converted
 
-        Returns:
-            series (pandas Series): converted variable
+        Returns
+        -------
+        pd.Series
+            Variable values as a pandas Series
         """
-
-        if type(var) is not pd.core.series.Series:
+        if not isinstance(var, pd.Series):
             var = pd.Series(var)
         return var
+
+    def reject_outlier_rt(self, rt, verbose=True):
+        if self.outlier_cutoff_sd is None:
+            return rt
+        cutoff = rt.std() * self.outlier_cutoff_sd
+        if verbose:
+            n_excluded = (rt > cutoff).sum()
+            print(f"Outlier rejection excluded {n_excluded} trials.")
+        return rt.mask(rt > cutoff)        
diff --git a/tests/test_1_smoketest.py b/tests/test_1_smoketest.py
index aaf861a..3b3888d 100644
--- a/tests/test_1_smoketest.py
+++ b/tests/test_1_smoketest.py
@@ -1,6 +1,5 @@
 """test suite for rtanalysis
 """
-import pytest
 from rtanalysis.rtanalysis import RTAnalysis
 
 
diff --git a/tests/test_2_fit.py b/tests/test_2_fit.py
index 46ed4e3..b3bea7e 100644
--- a/tests/test_2_fit.py
+++ b/tests/test_2_fit.py
@@ -15,5 +15,5 @@ def test_rtanalysis_fit():
     meanAcc = 0.8
     test_df = generate_test_df(meanRT, sdRT, meanAcc)
     rta.fit(test_df.rt, test_df.accuracy)
-    assert np.allclose(meanRT, rta.meanrt_)
-    assert np.allclose(meanAcc, rta.meanacc_)
+    assert np.allclose(meanRT, rta.mean_rt_)
+    assert np.allclose(meanAcc, rta.mean_accuracy_)
diff --git a/tests/test_4_fixture.py b/tests/test_4_fixture.py
index ed3e1dd..5cc514a 100644
--- a/tests/test_4_fixture.py
+++ b/tests/test_4_fixture.py
@@ -23,8 +23,8 @@ def simulated_data(params):
 def test_rtanalysis_fit(simulated_data, params):
     rta = RTAnalysis()
     rta.fit(simulated_data.rt, simulated_data.accuracy)
-    assert np.allclose(params["meanRT"], rta.meanrt_)
-    assert np.allclose(params["meanAcc"], rta.meanacc_)
+    assert np.allclose(params["meanRT"], rta.mean_rt_)
+    assert np.allclose(params["meanAcc"], rta.mean_accuracy_)
 
 
 def test_rtanalysis_checkfail(simulated_data, params):
diff --git a/tests/test_5_parametric.py b/tests/test_5_parametric.py
index e5c148b..c394841 100644
--- a/tests/test_5_parametric.py
+++ b/tests/test_5_parametric.py
@@ -18,8 +18,8 @@ def test_rtanalysis_parameteric(meanRT, sdRT, meanAcc):
     rta = RTAnalysis()
     if meanAcc > 0:
         rta.fit(test_df.rt, test_df.accuracy)
-        assert np.allclose(meanRT, rta.meanrt_)
-        assert np.allclose(meanAcc, rta.meanacc_)
+        assert np.allclose(meanRT, rta.mean_rt_)
+        assert np.allclose(meanAcc, rta.mean_accuracy_)
     else:
         with pytest.raises(ValueError):
             rta.fit(test_df.rt, test_df.accuracy)