Grubbs test added

maximtrp · Jun 29, 2017 · b51dd05 · b51dd05
1 parent 7ede56c
commit b51dd05
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 1 deletion.
diff --git a/posthocs/__init__.py b/posthocs/__init__.py
@@ -5,4 +5,4 @@
     import sign_array, sign_plot
 
 from posthocs._helpers \
-    import outliers_iqr
+    import outliers_iqr, outliers_grubbs
diff --git a/posthocs/_helpers.py b/posthocs/_helpers.py
@@ -54,3 +54,54 @@ def outliers_iqr(x, ret = 'filtered', coef = 1.5):
         return np.where((x < ll) | (x > ul))[0]
     else:
         return x[(x >= ll) & (x <= ul)]
+
+def outliers_grubbs(x, hypo = False, alpha = 0.05):
+
+    '''Grubbs' Test for Outliers [1]_. This is the two-sided version of the test.
+
+        The null hypothesis implies that there are no outliers in the data set.
+
+        Parameters
+        ----------
+        x : array_like or ndarray
+            An array, any object exposing the array interface, containing
+            data to test for an outlier in.
+
+        hypo : bool, optional
+            Specifies whether to return a bool value of a hypothesis test result.
+            Returns True when we can reject the null hypothesis. Otherwise, False.
+            Available options are:
+                True : return a hypothesis test result
+                False : return a filtered array without an outlier (default)
+
+        alpha : float, optional
+            Significance level for a hypothesis test. Default is 0.05.
+
+        Returns
+        -------
+        Numpy array if hypo is False or a bool value of a hypothesis test result.
+
+        Notes
+        -----
+        .. [1] http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h1.htm
+
+        Examples
+        --------
+
+        >>> x = np.array([199.31,199.53,200.19,200.82,201.92,201.95,202.18,245.57])
+        >>> ph.outliers_grubbs(x)
+        array([ 199.31,  199.53,  200.19,  200.82,  201.92,  201.95,  202.18])
+    '''
+
+    val = np.max(np.abs(x - np.mean(x)))
+    ind = np.argmax(np.abs(x - np.mean(x)))
+    G = val / np.std(x, ddof=1)
+    result = G > (N - 1)/np.sqrt(N) * np.sqrt((ss.t.ppf((1-alpha)/(2*N), N-2) ** 2) / (N - 2 + ss.t.ppf((1-alpha)/(2*N), N-2) ** 2 ))
+
+    if hypo:
+        return result
+    else:
+        if result:
+            return np.delete(x, ind)
+        else:
+            return x