In [11]:
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Enumerator
import Graphics.Vega.VegaLite hiding (density)
import IHaskell.Display.Hvega (vlShow)
import Data.Aeson

:ext OverloadedStrings
:ext OverloadedLists
:e GADTs
:e FlexibleContexts
:e FlexibleInstances
:e DeriveFunctor
:e GeneralizedNewtypeDeriving

In [12]:

import System.Random.MWC (createSystemRandom)
import qualified Graphics.Vega.VegaLite as VL
import IHaskell.Display.Hvega (vlShow)
import Data.Aeson (ToJSON(toJSON), Value)
import Data.Aeson (encode)
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.Text (Text, pack)

import Control.Monad (liftM2, replicateM, forM, forM_)
import Control.Monad.IO.Class (liftIO)
import Data.List (sort)
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Traced
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Inference.SMC as SMC
import Control.Monad.Bayes.Inference.RMSMC as RMSMC
import Control.Monad.Bayes.Sequential
import Control.Monad.Bayes.Population
import Control.Monad.Bayes.Traced.Static (Traced)
import Control.Monad.Bayes.Inference.SMC

import Numeric.Log
import Control.Monad.Bayes.Class

import Data.List (partition)

barPlot :: Text -> VL.VLSpec
barPlot xName = 
    let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Nominal]
            . VL.position VL.Y [VL.PName "binnedData", VL.PAggregate VL.Count, VL.PmType VL.Quantitative, VL.PAxis [VL.AxTitle "count"]]
    in VL.asSpec [VL.mark VL.Bar [VL.MOpacity 1.0, VL.MColor "#a3c6de"], encoding []]

linePlot :: Text -> Text -> VL.VLSpec
linePlot xName yName = 
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Quantitative]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Quantitative]
  in VL.asSpec [VL.mark VL.Line [VL.MColor "blue"], encoding []]

scatterBlue xName yName (xmin, xmax) (ymin, ymax) =
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [xmin, xmax]]]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [ymin, ymax]]]
  in VL.asSpec [VL.mark VL.Circle [VL.MColor "blue"], encoding []]
  
scatterGreen xName yName (xmin, xmax) (ymin, ymax) =
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [xmin, xmax]]]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [ymin, ymax]]]
  in VL.asSpec [VL.mark VL.Circle [VL.MColor "green"], encoding []]
  
scatterPlotWithColor :: Text -> Text -> Text -> (Double, Double) -> (Double, Double) -> VL.VLSpec
scatterPlotWithColor xName yName zName (xmin, xmax) (ymin, ymax) =
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [xmin, xmax]]]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Quantitative, VL.PScale [VL.SDomain $ VL.DNumbers [ymin, ymax]]]
            . VL.color [ VL.MName zName, VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]]]
  in VL.asSpec [VL.mark VL.Circle [], encoding []]

density2DPlot :: Text -> Text -> (Double, Double) -> (Double, Double) -> VL.VLSpec
density2DPlot xName yName (xmin, xmax) (ymin, ymax) = 
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PBin [VL.Nice False, VL.Steps [0.05, 0.5, 5.0], VL.Extent xmin xmax], VL.PmType VL.Quantitative]
            . VL.position VL.Y [VL.PName yName, VL.PBin [VL.Nice False, VL.Steps [0.05, 0.5, 5.0], VL.Extent ymin ymax], VL.PmType VL.Quantitative]
            . VL.color [ VL.MAggregate VL.Count, VL.MName "col", VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]]]
  in VL.asSpec [VL.mark VL.Rect [], encoding []]

imagePlot :: Text -> Text -> Text -> VL.VLSpec
imagePlot xName yName zName =
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Nominal, VL.PAxis [VL.AxGridOpacity 0.1]]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Nominal, VL.PSort [VL.Descending], VL.PAxis [VL.AxGridOpacity 0.1]]
            . VL.fill [ VL.MName zName, VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]]]
            . VL.stroke [ VL.MName zName, VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]],
                          VL.MLegend [VL.LType VL.GradientLegend]]
  in VL.asSpec [VL.mark VL.Rect [], encoding []]
  
imageFacetPlot :: Text -> Text -> Text -> VL.VLSpec
imageFacetPlot xName yName zName =
  let encoding = VL.encoding
            . VL.position VL.X [VL.PName xName, VL.PmType VL.Ordinal, VL.PAxis [VL.AxGrid False]]
            . VL.position VL.Y [VL.PName yName, VL.PmType VL.Ordinal, VL.PSort [VL.Descending], VL.PAxis [VL.AxGrid False]]
            . VL.fill [ VL.MName zName, VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]], VL.MLegend [VL.LOrient VL.LOBottom]]
            . VL.stroke [ VL.MName zName, VL.MmType VL.Quantitative, VL.MScale [VL.SScheme "blues" [0.0, 1.0]],
                          VL.MLegend [VL.LOrient VL.LOBottom, VL.LDirection VL.Horizontal, VL.LType VL.GradientLegend]]
  in VL.asSpec [VL.mark VL.Rect [], encoding [], VL.width 200,  VL.height 100]

data SpecGrid = H [[VL.VLSpec]] | V [[VL.VLSpec]] | L [VL.VLSpec] | S VL.VLSpec | F (Text, Int, VL.VLSpec)

data InputData = Cols [(Text, VL.DataValues)]
               | File FilePath

plot :: (Double, Double) -> SpecGrid -> InputData -> VL.VegaLite
plot (figw,figh) specGrid dataPoints =
    let description = VL.description "Plot"
        dat' = case dataPoints of
            Cols cols -> foldl (.) (VL.dataFromColumns []) (map (uncurry VL.dataColumn) cols) []
            File fp -> VL.dataFromSource (pack fp) []
        configure = VL.configure
            . VL.configuration (VL.Axis
                                        [ VL.Domain False,
                                          VL.LabelColor "#7F7F7F",
                                          VL.LabelPadding 4,
                                          VL.TickColor "#7F7F7F",
                                          VL.TickSize 5.67,
                                          VL.Grid True,
                                          VL.GridColor "#FFFFFF"
                                          ])
        spec = case specGrid of
            S s -> VL.layer [s]
            L ls -> VL.layer ls
            H lss -> VL.hConcat (map (VL.asSpec . (:[]) . VL.layer) lss)
            V lss -> VL.vConcat (map (VL.asSpec . (:[]) . VL.layer) lss)
            F (_, _, s) -> VL.specification s
        facet = case specGrid of
            F (field, nColumns, _) -> [VL.columns $ fromIntegral nColumns, VL.facetFlow [VL.FName field, VL.FmType VL.Nominal]]
            _   -> [VL.width figw,  VL.height figh]
    in VL.toVegaLite $ [VL.background "#f9f9f9", configure [], description, dat', spec] ++ facet

In [7]:
-- vlShow $ plot (200, 100) (L [barPlot "b"]) (Cols [("b", VL.Booleans $ sampleSTfixed $ prior $ mh 10 $ bernoulli 0.5)])

multinorm (a,b) = do
    x <- normal a 1
    y <- normal b 1
    return (x,y)

mixture = do
    x <- uniformD [multinorm (a,b) | (a,b) <- [(1,1), (5,5)]]
    x

(nums, nums2) <- unzip <$> sampleIO (replicateM 1000 $ mixture)
vlShow $ plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
        [("x", VL.Numbers nums),
        ("y", VL.Numbers nums2)
        ])


In [8]:
whichCluster point = do
    let (a',b') = point
    cluster@(a,b) <- uniformD [(1,1), (5,5)]
    factor (normalPdf a 1 a' * normalPdf b 1 b')
    -- prediction <- multinorm cluster
    -- condition (prediction == point )
    return cluster


enumerate $ whichCluster (3,3)
-- sampleIO $ runPopulation $ smcMultinomial 10 10 $ (whichCluster (3,3))

[((1.0,1.0),0.49999999999999994),((5.0,5.0),0.49999999999999994)]

In [9]:
samples <- sampleIO $ prior $ mh 10000 $ whichCluster (4,3)

a = length samples
b = length $ Prelude.filter (==(5,5)) samples




In [81]:
vlShow $ plot (200, 100) (L [barPlot "b"]) (Cols [("b", VL.Booleans $ (take (a-b) $ Prelude.repeat True) <> (take (b) $ Prelude.repeat False))])


()

In [13]:
-- cars  =  dataFromUrl "https://vega.github.io/vega-datasets/data/cars.json" []

-- enc = encoding
--         . position X [ PName "Horsepower", PmType Quantitative ]
--         . position Y [ PName "Miles_per_Gallon", PmType Quantitative, PTitle "Miles per Gallon" ]
--         . color [ MName "Origin" ]

-- bkg = background "rgba(0, 0, 0, 0.05)"



In [14]:
-- vlShow $ toVegaLite [ bkg, cars, mark Circle [MTooltip TTEncoding], enc [] ]

todo: 

get dataset for logistic regression
plot dataset
do a mouseover
plot posterior predictive samples on top

get diagrams working: show in notebook


In [76]:
xs :: [Double]
xs = [-10, -5, 40, 60, 100]

labels :: [Bool]
labels = [False, False, False, True, True]

logisticRegression :: (MonadInfer m) => [(Double, Bool)] -> m (Double, Double)
logisticRegression dat = do
  m <- normal 0 1
  b <- normal 0 1
  sigma <- gamma 1 1
  let y x = normal (m * x + b) sigma
      sigmoid x = y x >>= \t -> return $ 1 / (1 + exp (- t))
      obs x label = do
        p <- sigmoid x
        factor $ (Exp . log) $ if label then p else 1 - p
  mapM_ (uncurry obs) dat
  return (m,b)

forward m b x = 1 / (1 + exp (- (m * x + b)))

syntheticData :: MonadSample m => Int -> m [(Double, Bool)]
syntheticData n = replicateM n syntheticPoint
  where
    syntheticPoint = do
      x <- uniform (-1) 1
      label <- bernoulli 0.5
      return (x, label)

boolToInt True = 10
boolToInt _ = -10



In [17]:
-- pp <- (sampleIO $ prior $ mh 1000 $ logisticRegression $ zip xs labels)

-- pp

In [4]:
vlShow $ plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
        [("x", VL.Numbers xs),
        ("y", VL.Numbers (boolToInt <$> labels))
        ])

pp <- (sampleIO $ prior $ mh 1000 $ logisticRegression $ zip xs labels)

-- vlShow $ plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
--         [("x", VL.Numbers $ take 64 $ cycle [1,2,3,4,5,6,7,8]),
--         ("y", VL.Numbers (take 8 $ pp))
--         ])



In [51]:

class Plottable a where
    plotVega :: a -> VegaLiteLab

instance Plottable [(Double, Double)] where
    plotVega ds = let (nums, nums2) = unzip ds in vlShow $ plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
        [("x", VL.Numbers nums),
        ("y", VL.Numbers nums2)
        ])

instance Plottable [(Double)] where
    plotVega nums = vlShow $ plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
        [("x", VL.Numbers nums),
        ("y", VL.Numbers (take (length nums) $ Prelude.repeat 0))
        ])

-- instance Plottable [Bool] where
--     plotVega nums = plot (200, 100) (L [scatterBlue "x" "y" (-10,10) (-10,10)]) (Cols 
--         [("x", VL.Numbers nums),
--         ("y", VL.Numbers (take (length nums) $ Prelude.repeat 0))
--         ])

instance Plottable (Double -> Double) where
    plotVega f = vlShow $ plot (600, 300)
              (L [linePlot "x" "y"])
              (Cols [("x", VL.Numbers $ fromIntegral <$> range), ("y", VL.Numbers (f <$> range))])        

In [80]:
samples <- sampleIO $ prior $ mh 10000 $ logisticRegression $ zip xs labels



In [81]:
(means, biases) = unzip samples

av ls = Prelude.sum ls / (fromIntegral $ Prelude.length ls)

av biases

-0.6291807497397649

In [82]:
plotVega ((forward (av means) (av biases)) :: (Double -> Double))

In [17]:

range = [-100..100]

graphOf f = vlShow $ plot (600, 300)
              (L [linePlot "x" "y"])
              (Cols [("x", VL.Numbers $ fromIntegral <$> range), ("y", VL.Numbers (f <$> range))])

graphOf (\ x -> exp $ ln $ normalPdf 0 10 x)

In [86]:
import Control.Monad.Trans.Cont
import Control.Monad.Bayes.Class (MonadSample (random, normal, bernoulli), MonadCond (score), MonadInfer, condition)
import qualified Statistics.Distribution.Normal as Statistics
import Statistics.Distribution (density)
import Numeric.Integration.TanhSinh
import Control.Monad.Bayes.Weighted (runWeighted)
import qualified Statistics.Distribution.Uniform as Statistics
import Numeric.Log (Log(ln, Exp))


newtype Measure a = Measure (Cont Double a) deriving (Functor, Applicative, Monad)

runMeasure :: (a -> Double) -> Measure a -> Double
runMeasure f (Measure a) = runCont a f

instance MonadSample Measure where
    -- normal m s = fromDensityFunction $ density $ Statistics.normalDistr m s
    random = fromDensityFunction $ density $ Statistics.uniformDistr 0 1


fromDensityFunction :: (Double -> Double) -> Measure Double
fromDensityFunction d = Measure $ cont $ \f ->
    quadratureTanhSinh (\x -> f x * d x)
  where
    quadratureTanhSinh = result . last . (\z -> trap 0 1 z)


probability (lower, upper) = runMeasure (\(x,d) -> if x<upper && x  > lower then exp $ ln d else 0) . runWeighted


: 

In [87]:
:t probability

In [91]:
model = do
    x <- bernoulli 0.5
    if x then ((+10) . (*10) <$> random ) else ((\x -> x-10) . (*10) <$> random )
    -- x <- random
    -- factor (log $ Exp $ x)
    -- return (x*100)

graphOf (\x -> probability (x-1, x+1) ((model))) 

-- probability (0,10) (random)