Skip to content

Commit

Permalink
Initial Hasktorch 0.2 interface (hasktorch#42)
Browse files Browse the repository at this point in the history
* Starting point for Hasktorch 0.2 interface

* Flesh out basic functionality in the new interface

* Working XOR MLP

* Add the independent function

* Aten -> ATen

* Add hasktorch and examples to CI

* Add side-effect for ones

* Make autograd safer
  • Loading branch information
apaszke authored and junjihashimoto committed Jun 4, 2019
1 parent cf969c3 commit 0af2a97
Show file tree
Hide file tree
Showing 21 changed files with 737 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
- run: stack exec codegen-exe
- run: stack test --jobs 2
- run: stack exec ffi-test
- run: stack exec xor_mlp
cabal-build:
docker:
- image: "ubuntu:18.04"
Expand Down Expand Up @@ -57,6 +58,8 @@ jobs:
- run: cabal new-exec codegen-exe
- run: cabal new-test ffi --jobs=2
- run: cabal new-exec ffi-test
- run: cabal new-test hasktorch
- run: cabal exec xor_mlp
osx-stack-build:
macos:
xcode: "10.2.1"
Expand Down Expand Up @@ -89,6 +92,7 @@ jobs:
- run: stack exec codegen-exe
- run: stack test --jobs 2
- run: stack exec ffi-test
- run: stack exec xor_mlp

workflows:
version: 2
Expand Down
2 changes: 2 additions & 0 deletions cabal.project
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
packages:
codegen/*.cabal
ffi/*.cabal
hasktorch/*.cabal
examples/*.cabal
inline-c/inline-c/*.cabal
inline-c/inline-c-cpp/*.cabal
20 changes: 20 additions & 0 deletions examples/examples.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: examples
version: 0.2.0.0
synopsis: examples for the new version of hasktorch
-- description:
homepage: https://github.com/githubuser/ffi-experimental#readme
license: BSD3
author: Austin Huang
maintainer: hasktorch@gmail.com
copyright: 2019 Austin Huang
category: Codegen
build-type: Simple
cabal-version: >=1.10

executable xor_mlp
hs-source-dirs: xor_mlp
main-is: Main.hs
default-language: Haskell2010
build-depends: base >= 4.7 && < 5
, hasktorch
, mtl
138 changes: 138 additions & 0 deletions examples/xor_mlp/Main.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE FunctionalDependencies #-}

module Main where

import Torch.Tensor
import Torch.DType
import Torch.TensorFactories
import Torch.Functions
import Torch.TensorOptions
import Torch.Autograd

import Control.Monad.State.Strict
import Data.List (foldl', scanl', intersperse)

type Parameter = IndependentTensor
type ParamStream a = State [Parameter] a

nextParameter :: ParamStream Parameter
nextParameter = do
params <- get
case params of
[] -> error "Not enough parameters supplied to replaceParameters"
(p : t) -> do put t; return p

class Parametrized f where
flattenParameters :: f -> [Parameter]
replaceOwnParameters :: f -> ParamStream f

replaceParameters :: Parametrized f => f -> [Parameter] -> f
replaceParameters f params =
let (f', remaining) = runState (replaceOwnParameters f) params in
if null remaining
then f'
else error "Some parameters in a call to replaceParameters haven't been consumed!"

class Randomizable spec f | spec -> f where
sample :: spec -> IO f

class (Randomizable spec f, Parametrized f) => Module spec f

--------------------------------------------------------------------------------
-- Linear function
--------------------------------------------------------------------------------

data LinearSpec = LinearSpec { in_features :: Int, out_features :: Int }
deriving (Show, Eq)

data Linear = Linear { weight :: Parameter, bias :: Parameter }
deriving (Show)

instance Randomizable LinearSpec Linear where
sample LinearSpec{..} = do
w <- makeIndependent =<< randn' [in_features, out_features]
b <- makeIndependent =<< randn' [out_features]
return $ Linear w b

instance Parametrized Linear where
flattenParameters Linear{..} = [weight, bias]
replaceOwnParameters _ = do
weight <- nextParameter
bias <- nextParameter
return $ Linear{..}


linear :: Linear -> Tensor -> Tensor
linear Linear{..} input = (matmul input (toDependent weight)) + (toDependent bias)

--------------------------------------------------------------------------------
-- MLP
--------------------------------------------------------------------------------

data MLPSpec = MLPSpec { feature_counts :: [Int], nonlinearitySpec :: Tensor -> Tensor }

data MLP = MLP { layers :: [Linear], nonlinearity :: Tensor -> Tensor }

instance Randomizable MLPSpec MLP where
sample MLPSpec{..} = do
let layer_sizes = mkLayerSizes feature_counts
linears <- mapM sample $ map (uncurry LinearSpec) layer_sizes
return $ MLP { layers = linears, nonlinearity = nonlinearitySpec }
where
mkLayerSizes (a : (b : t)) =
scanl shift (a, b) t
where
shift (a, b) c = (b, c)

instance Parametrized MLP where
flattenParameters MLP{..} = concat $ map flattenParameters layers
replaceOwnParameters mlp = do
new_layers <- mapM replaceOwnParameters (layers mlp)
return $ mlp { layers = new_layers }

mlp :: MLP -> Tensor -> Tensor
mlp MLP{..} input = foldl' revApply input $ intersperse nonlinearity $ map linear layers
where revApply x f = f x

--------------------------------------------------------------------------------
-- Training code
--------------------------------------------------------------------------------

batch_size = 32
num_iters = 10000

model :: MLP -> Tensor -> Tensor
model params t = sigmoid (mlp params t)

sgd :: Tensor -> [Parameter] -> [Tensor] -> [Tensor]
sgd lr parameters gradients = zipWith (\p dp -> p - (lr * dp)) (map toDependent parameters) gradients

main :: IO ()
main = do
init <- sample $ MLPSpec { feature_counts = [2, 20, 20, 1], nonlinearitySpec = Torch.Functions.tanh }
trained <- foldLoop init num_iters $ \state i -> do
input <- rand' [batch_size, 2] >>= return . (toDType Float) . (gt 0.5)
let expected_output = tensorXOR input

let output = squeezeAll $ model state input
let loss = mse_loss output expected_output

let flat_parameters = flattenParameters state
let gradients = grad loss flat_parameters

if i `mod` 100 == 0
then do putStrLn $ show loss
else return ()

new_flat_parameters <- mapM makeIndependent $ sgd 5e-4 flat_parameters gradients
return $ replaceParameters state $ new_flat_parameters
return ()
where
foldLoop x count block = foldM block x [1..count]

tensorXOR :: Tensor -> Tensor
tensorXOR t = (1 - (1 - a) * (1 - b)) * (1 - (a * b))
where
a = select t 1 0
b = select t 1 1
4 changes: 4 additions & 0 deletions ffi/ffi.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ library
, ATen.Unmanaged.Type.Extra
, ATen.Managed.NN
, ATen.Managed.TH
, ATen.Managed.Cast
, ATen.Managed.Native
, ATen.Managed.Type.Tuple
, ATen.Managed.Type.Generator
Expand All @@ -48,9 +49,11 @@ library
, ATen.Managed.Type.StdArray
, ATen.Managed.Type.Context
, ATen.Managed.Type.Extra
, Torch.Unmanaged.Autograd
, Torch.Unmanaged.NN
, Torch.Unmanaged.TH
, Torch.Unmanaged.Native
, Torch.Managed.Autograd
, Torch.Managed.NN
, Torch.Managed.TH
, Torch.Managed.Native
Expand All @@ -69,6 +72,7 @@ library
, mklml
, caffe2
, torch
extra-ghci-libraries: stdc++
if os(darwin)
ld-options: -Wl,-keep_dwarf_unwind
ghc-options: -optc-D_GLIBCXX_USE_CXX11_ABI=0 -optc-std=c++11 -optc-xc++
Expand Down
10 changes: 9 additions & 1 deletion ffi/src/ATen/Cast.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import Foreign.Ptr
import Foreign.Storable
import ATen.Class


instance Castable () () where
cast x f = f x
uncast x f = f x
Expand Down Expand Up @@ -189,10 +188,19 @@ instance Castable Word64 Word64 where
cast x f = f x
uncast x f = f x

instance Castable Bool CBool where
cast x f = f (if x then 1 else 0)
uncast x f = f (x /= 0)

instance Castable Int CInt where
cast x f = f (fromIntegral x)
uncast x f = f (fromIntegral x)

instance Castable Int Int64 where
cast x f = f (fromIntegral x)
-- TODO: Int64 might have a wider range than Int
uncast x f = f (fromIntegral x)

instance Castable Int16 CShort where
cast x f = f (fromIntegral x)
uncast x f = f (fromIntegral x)
Expand Down
6 changes: 5 additions & 1 deletion ffi/src/ATen/Const.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ C.context $ C.cppCtx <> mempty { C.ctxTypesTable = typeTable }

C.include "<ATen/ATen.h>"


kByte :: ScalarType
kByte = [C.pure| int8_t { (int8_t) at::ScalarType::Byte } |]

Expand Down Expand Up @@ -101,6 +100,11 @@ kCOMPILE_TIME_MAX_DEVICE_TYPES = [C.pure| int16_t { (int16_t) at::DeviceType::CO
kONLY_FOR_TEST :: DeviceType
kONLY_FOR_TEST = [C.pure| int16_t { (int16_t) at::DeviceType::ONLY_FOR_TEST } |]

-- TODO: add all values for at::Reduction

kMean :: Int64
kMean = [C.pure| int64_t { (int64_t) Reduction::Mean } |]

bCPU :: Backend
bCPU = [C.pure| int { (int) at::Backend::CPU } |]

Expand Down
36 changes: 36 additions & 0 deletions ffi/src/ATen/Managed/Cast.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module ATen.Managed.Cast where

import Foreign.ForeignPtr
import Control.Monad

import ATen.Class
import ATen.Cast
import ATen.Type
import ATen.Managed.Type.IntArray
import ATen.Managed.Type.TensorList

instance Castable [Int] (ForeignPtr IntArray) where
cast xs f = do
arr <- newIntArray
forM_ xs $ (intArray_push_back_l arr) . fromIntegral
f arr
uncast xs f = do
len <- intArray_size xs
f =<< mapM (\i -> intArray_at_s xs i >>= return . fromIntegral) [0..(len - 1)]

instance Castable [ForeignPtr Tensor] (ForeignPtr TensorList) where
cast xs f = do
l <- newTensorList
forM_ xs $ (tensorList_push_back_t l)
f l
uncast xs f = do
len <- tensorList_size xs
f =<< mapM (tensorList_at_s xs) [0..(len - 1)]


instance Castable (ForeignPtr Scalar) (ForeignPtr Scalar) where
cast x f = f x
uncast x f = f x
19 changes: 19 additions & 0 deletions ffi/src/Torch/Managed/Autograd.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

module Torch.Managed.Autograd where

import Foreign.ForeignPtr

import qualified Torch.Unmanaged.Autograd as Unmanaged
import qualified ATen.Unmanaged.Type.Tensor
import qualified ATen.Unmanaged.Type.TensorList
import ATen.Type
import ATen.Class
import ATen.Cast


grad :: ForeignPtr Tensor -> ForeignPtr TensorList -> IO (ForeignPtr TensorList)
grad = cast2 Unmanaged.grad


makeIndependent :: ForeignPtr Tensor -> IO (ForeignPtr Tensor)
makeIndependent = cast1 Unmanaged.makeIndependent
71 changes: 71 additions & 0 deletions ffi/src/Torch/Unmanaged/Autograd.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}

module Torch.Unmanaged.Autograd where

import Foreign.Ptr
import qualified Language.C.Inline.Cpp as C
import qualified Language.C.Inline.Cpp.Exceptions as C
import qualified Language.C.Inline.Context as C
import qualified Language.C.Types as C

import ATen.Type

C.context $ C.cppCtx <> mempty { C.ctxTypesTable = typeTable }

C.include "<vector>"
C.include "<torch/torch.h>"
C.include "<torch/csrc/autograd/variable.h>"
C.include "<torch/csrc/autograd/engine.h>"
C.include "<ATen/core/functional.h>"

grad :: Ptr Tensor -> Ptr TensorList -> IO (Ptr TensorList)
grad y inputs = [C.throwBlock| std::vector<at::Tensor>* {
torch::autograd::Variable y = *$(at::Tensor* y);
const auto & inputs = *$(std::vector<at::Tensor>* inputs);

torch::autograd::edge_list roots { y.gradient_edge() };
if (!roots[0].function) {
throw std::runtime_error("Differentiated tensor not require grad");
}

if (y.numel() != 1) {
throw std::runtime_error("Differentiated tensor has more than a single element");
}
torch::autograd::variable_list grads { torch::ones_like(y) };

torch::autograd::edge_list output_edges;
output_edges.reserve(inputs.size());
for (torch::autograd::Variable input : inputs) {
const auto output_nr = input.output_nr();
auto grad_fn = input.grad_fn();
if (!grad_fn) {
grad_fn = input.try_get_grad_accumulator();
}
if (!input.requires_grad()) {
throw std::runtime_error("One of the differentiated Tensors does not require grad");
}
if (!grad_fn) {
output_edges.emplace_back();
} else {
output_edges.emplace_back(grad_fn, output_nr);
}
}

auto & engine = torch::autograd::Engine::get_default_engine();
auto outputs = engine.execute(roots, grads,
/*keep_graph=*/true,
/*create_graph=*/false,
output_edges);

return new std::vector<at::Tensor>(at::fmap<at::Tensor>(outputs));
}|]

makeIndependent :: Ptr Tensor -> IO (Ptr Tensor)
makeIndependent t = [C.throwBlock| at::Tensor* {
return new at::Tensor($(at::Tensor* t)->detach().set_requires_grad(true));
}|]
Loading

0 comments on commit 0af2a97

Please sign in to comment.