# Chapter 1

Code contained in this notebook is from Chapter 1

### Reference NuGet packages

The main Python package used throughout this book is NumPy. I'm using the .NET equivalent [NumSharp](https://github.com/SciSharp/NumSharp), a high performance computation for N-D Tensors in .NET, similar API to NumPy.

In [1]:
#r "nuget:NumSharp"

In [3]:
open NumSharp

### Page 4

In [5]:
let a = np.array([|1;2;3|])
let b = np.array([|4;5;6|])

In [7]:
printfn "%O" (a + b)

[5, 7, 9]


In [8]:
printfn "%O" (a * b)

[4, 10, 18]


### Page 5

In [18]:
let a = np.array(
    [|
        [|1;2|]
        [|3;4|]
    |])

In [19]:
// Sum all of the rows in each column
printfn "%O" (a.sum(0))

[4, 6]


In [20]:
// Sum all of the columns in each row
printfn "%O" (a.sum(1))

[3, 7]


In [21]:
let a = np.array(
    [|
        [|1;2;3|]
        [|4;5;6|]
    |])

let b = np.array([|10;20;30|])

In [22]:
printfn "%O" (a + b)

[[11, 22, 33], 
[14, 25, 36]]


### Basic functions in NumPy (Page 6)

In [23]:
let square (x:NDArray) = 
    np.square(&x)

In [24]:
let leakyRelu (x:NDArray) = 
    let boundRef = 0.2 * x
    np.maximum(&boundRef,&x)

In [33]:
let a = np.arange(-2,4)
a |> square

index,value
0,4
1,1
2,0
3,1
4,4
5,9


In [34]:
a |> leakyRelu

index,value
0,-0.4
1,-0.2
2,0.0
3,1.0
4,2.0
5,3.0


## Derivatives (Page 8)

In [138]:
let deriv (delta:float) (f:NDArray -> NDArray) (input:NDArray) = 
    (((input + delta) |> square) - ((input - delta) |> square)) / (2. * delta)

In [139]:
// Partially apply the default delta value 0.001 to deriv function
let (derivDefault:(NDArray -> NDArray) -> NDArray -> NDArray) = deriv 0.001

In [140]:
derivDefault square a

index,value
0,-3.99999999999956
1,-1.9999999999998352
2,0.0
3,1.9999999999998352
4,3.99999999999956
5,5.999999999999339


### Nested Functions (Page 10)

In [141]:
let chainLength2 f input = input |> f

In [142]:
// In this case, when functions are nested, composition can be used.
a |> chainLength2 (leakyRelu >> square)

index,value
0,0.16
1,0.04
2,0.0
3,1.0
4,4.0
5,9.0


### The Chain Rule (Page 12 & 13)

In [143]:
let sigmoid (x:NDArray) = 
    let negXRef = -1 * x
    1 / (1 + np.exp(&negXRef))

In [144]:
// Uses chain rule to compute derivative of two nested functions
// (f2(f1(x)))' = f2'(f1(x)) * f1'(x)

let chainDeriv2 f1 f2 (input:NDArray) = 
    let f1OfX = input |> f1 // df1 / dx
    let df1dx = input |> derivDefault f1 // df1/du
    let df2du = f1OfX |> derivDefault f2 // df2/du(f1(x))
    df1dx * df2du

In [145]:
a |> chainDeriv2 square sigmoid

index,value
0,-32.00000000000361
1,-3.999999999999341
2,0.0
3,3.999999999999341
4,32.00000000000361
5,107.99999999990692


In [146]:
a |> chainDeriv2 sigmoid square

index,value
0,-0.953623376176836
1,-1.075765685479896
2,0.0
3,2.924234314519806
4,7.046376623822349
5,11.430889521867844


### A Slightly Longer Example (Page 15 & 16)

In [147]:
// Uses chain rule to compute derivative of two nested functions
// (f3(f2(f1(x))))' = f3'(f2(f1(x))) * f2'(f1(x)) * f1'(x)

let chainDeriv3 f1 f2 f3 (input:NDArray) = 
    let f1OfX = input |> f1 // f1(x)
    let f2OfX = f1OfX |> f2 // (f2(f1(x)))
    let df3du = f2OfX |> derivDefault f3
    let df2du = f1OfX |> derivDefault f2
    let df1dx = input |> derivDefault f1
    df1dx * df2du * df3du

In [148]:
a |> chainDeriv3 leakyRelu sigmoid square

index,value
0,2.568398975280063
1,0.7202656042999724
2,0.0
3,5.84846862903913
4,28.18550649528629
5,68.58533713119951


### Functions with Multiple Inputs (Page 18)

In [149]:
let multipleInputsAdd (sigma) (x:NDArray) (y:NDArray) = 
    sigma (x + y)

In [150]:
a |> multipleInputsAdd square a

index,value
0,16
1,4
2,0
3,4
4,16
5,36


### Derivatives of Functions with Multiple Inputs (Page 20)

In [151]:
let multipleInputsAddBackwards (sigma) (x:NDArray) (y:NDArray) = 
    let a = x + y // Compute forward pass
    let dsda = a |> derivDefault sigma
    let dadx,dady = 1,1
    dsda * dadx, dsda * dady

In [152]:
a |> multipleInputsAddBackwards square a

Item1,Item2
"[ -8.000000000001783, -3.9999999999995595, 0, 3.9999999999995595, 8.000000000001783, 12.000000000004007 ]","[ -8.000000000001783, -3.9999999999995595, 0, 3.9999999999995595, 8.000000000001783, 12.000000000004007 ]"


### Creating New Features from Existing Features (Page 23)

In [153]:
let matmulForward (x:NDArray) (w:NDArray) = 
    np.dot(&x,&w)

### Derivatives of Functions with Multiple Vector Inputs (Page 25)

In [154]:
let matmulBackwardFirst (x:NDArray) (w:NDArray) = 
    np.transpose(&w,[|1;0|])

### Vector Functions and Their Derivatives: One Step Further (Page 26 & 27)

In [155]:
let matrixForwardExtra sigma (x: NDArray) (w:NDArray) = 
    sigma (np.dot(&x,&w))

### Vector Functions and Their Derivatives: The Backward Pass (Page 28)

In [156]:
let matrixFunctionBackward1 sigma (x:NDArray) (w:NDArray) = 
    let dSdN = 
        np.dot(&x,&w) 
        |> derivDefault sigma 
    let dNdX = np.transpose(&w,[|1;0|])
    np.dot(&dSdN,&dNdX)

In [157]:
np.random.seed(190203)

let mutable X = np.random.randn(1,3)
let mutable W = np.random.randn(3,1)

In [158]:
printfn "%O" X
printfn "%O" W

[[0.6517049161665682, -0.2665717649668461, 0.15257241764676238]]
[[-1.2805654524680707], 
[0.6660628214506024], 
[1.3664369275290302]]


In [159]:
W |> matmulBackwardFirst X

index,value
0,-1.2805654524680707
1,0.6660628214506024
2,1.3664369275290302


In [160]:
W |> matrixFunctionBackward1 sigmoid X

index,value
0,2.058185640363648
1,-1.0705278141367844
2,-2.196202355203703


### Computational Graph with Two 2D Matrix Inputs (Page 32 & 33)

In [182]:
let matrixFunctionForwardSum sigma (x:NDArray) (w:NDArray) = 
    np.dot(&x,&w)
    |> sigma
    |> (fun s -> np.sum(&s))

### The Fun Part: The Backward Pass (Page 36 & 37)

In [274]:
let matrixFunctionBackwardSum1 sigma (x:NDArray) (w:NDArray) = 
    
    let n = np.dot(&x,&w)
    let s = n |> sigma
    
    let l = np.sum(&s)
    
    let dLdS = np.ones_like(s)
    
    let dSdN = n |> derivDefault sigma
    
    let dLdN = dLdS * dSdN
    
    let dNdX = np.transpose(&w,[|1;0|])
    
    let dLdX = np.dot(&dSdN,&dNdX)
    
    dLdX

In [264]:
np.random.seed(190204)
let mutable X = np.random.randn(3, 3)
let mutable W = np.random.randn(3, 2)

In [267]:
printfn "%O" X

[[-0.4707108518250129, -0.4073042029369584, -0.6718479169964316], 
[0.37199226326492474, -0.9571555864958323, -0.138096246939663], 
[-0.7858376421341436, -0.6082089767864743, -1.5645461870890807]]


In [268]:
printfn "%O" W

[[-1.2961954688807813, 0.6487867969024643], 
[-0.1065787241222262, 0.08297097027790185], 
[0.8741833058411873, 0.8977530234810445]]


In [269]:
let n = np.dot(&X,&W)
n.shape

index,value
0,3
1,2


In [270]:
let s = n |> sigmoid
s.shape

index,value
0,3
1,2


In [271]:
W |> matrixFunctionForwardSum sigmoid X

index,value
0,2.236142898649634


In [276]:
W |> matrixFunctionBackwardSum1 sigmoid X

index,value
0,-1.394435181679182
1,-0.1704898998459658
2,-1.5761896272017917
3,1.347730859425737
4,0.1130647898209416
5,-0.8075866934981928
6,-1.8126156368421331
7,-0.2654599904240112
8,-4.024979054804085
