Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
108 lines (88 sloc) 3.04 KB
-----------------------------------------------------------------------------
-- |
-- Module : Data.Summary.Double
-- Copyright : Copyright (c) 2010, Patrick Perry <patperry@gmail.com>
-- License : BSD3
-- Maintainer : Patrick Perry <patperry@gmail.com>
-- Stability : experimental
--
-- Summary statistics for @Double@s.
--
module Data.Summary.Double (
-- * The @Summary@ data type
Summary,
summary,
update,
-- * @Summary@ properties
sampleSize,
sampleMin,
sampleMax,
sampleMean,
sampleSE,
sampleVar,
sampleSD,
sampleCI,
) where
import Data.List( foldl' )
import Text.Printf
import Data.Summary.Utils
-- | A type for storing summary statistics for a data set including
-- sample size, min and max values, and first and second moments.
data Summary = S {-# UNPACK #-} !Int -- sample size
{-# UNPACK #-} !Double -- sample mean
{-# UNPACK #-} !Double -- sum of squares
{-# UNPACK #-} !Double -- sample min
{-# UNPACK #-} !Double -- sample max
instance Show Summary where
show s@(S n mu _ l h) =
printf " sample size: %d" n
++ printf "\n min: %g" l
++ printf "\n max: %g" h
++ printf "\n mean: %g" mu
++ printf "\n SE: %g" (sampleSE s)
++ printf "\n 99%% CI: (%g, %g)" c1 c2
where (c1,c2) = sampleCI 0.99 s
-- | Get a summary of a list of values.
summary :: [Double] -> Summary
summary = foldl' update empty
-- | Get an empty summary.
empty :: Summary
empty = S 0 0 0 (1/0) (-1/0)
-- | Update the summary with a data point.
-- Running mean and variance computed as in Knuth, Vol 2, page 232,
-- 3rd edition, see http://www.johndcook.com/standard_deviation.html for
-- a description.
update :: Summary -> Double -> Summary
update (S n m s l h) x =
let n' = n+1
delta = x - m
m' = m + delta / fromIntegral n'
s' = s + delta*(x - m')
l' = if x < l then x else l
h' = if x > h then x else h
in S n' m' s' l' h'
-- | Get the sample size.
sampleSize :: Summary -> Int
sampleSize (S n _ _ _ _) = n
-- | Get the sample mean.
sampleMean :: Summary -> Double
sampleMean (S _ m _ _ _) = m
-- | Get the sample variance.
sampleVar :: Summary -> Double
sampleVar (S n _ s _ _) = s / fromIntegral (n - 1)
-- | Get the sample standard deviation.
sampleSD :: Summary -> Double
sampleSD s = sqrt (sampleVar s)
-- | Get the sample standard error.
sampleSE :: Summary -> Double
sampleSE s = sqrt (sampleVar s / fromIntegral (sampleSize s))
-- | Get a Central Limit Theorem-based confidence interval for the mean
-- with the specified coverage level. The level must be in the range @(0,1)@.
sampleCI :: Double -> Summary -> (Double,Double)
sampleCI level s = interval level (sampleMean s) (sampleSE s)
-- | Get the minimum of the sample.
sampleMin :: Summary -> Double
sampleMin (S _ _ _ l _) = l
-- | Get the maximum of the sample.
sampleMax :: Summary -> Double
sampleMax (S _ _ _ _ h) = h
Something went wrong with that request. Please try again.