In [None]:
using Plots, LaTeXStrings
using LinearAlgebra
using Optim

In [None]:
m, k, N = 2, 1, 1000

X = 12*rand(m, N) .- 6

f(x) = (x[1]^2 + x[2] - 11)^2 + (x[1] + x[2]^2 - 7)^2
f(x,y) = f([x,y])

y = [f(X[:,i]) for i=1:N];

In [None]:
N_train, N_test = 800, 200
X_train, X_test = X[:,1:N_train], X[:,N_train+1:N]
y_train, y_test = y[1:N_train], y[N_train+1:N]
size(x_train), size(x_test)

In [None]:
ax, bx = -6, 6
ay, by = -6, 6

xx = range(ax, bx, length=200)
yy = range(ay, by, length=200)
flevels = [0, 5, 20, 40, 60, 80, 100, 120, 150, 180, 300, 400, 600]

plt1 = plot(xlabel=L"x", ylabel=L"y", aspect_ratio=:equal, colorbar=:none, size=(600,600),
    xlims=(ax,bx), ylims=(ay,by), legend=:none)
contour!(xx, yy, f, levels=flevels, color=1, contour_labels=true)
#scatter!(X_train[1,:], X_train[2,:], c=2, label="train")
scatter!(X_test[1,:], X_test[2,:], c=3, label="test")

In [None]:
ReLU(z) = max.(0, z)

In [None]:
N0, N1, N2 = m, 32, 1

# Neural network parameters
A1 = randn(N1, N0)
b1 = randn(N1)
A2 = randn(N2, N1)
b2 = randn(N2)

F1(v0) = ReLU(A1*v0 + b1)
F2(v1) = (A2*v1 + b2)[1]

F(v) = F2(F1(v))
F(x,y) = F([x,y])

In [None]:
plot(xlabel=L"x", ylabel=L"y", aspect_ratio=:equal, colorbar=:none, size=(600,600),
    xlims=(ax,bx), ylims=(ay,by))
contour!(xx, yy, F, levels=-100:100, color=:black, contour_labels=true)

In [None]:
n = (N1*N0 + N1) + (N2*N1 + N2)

x0 = [A1[:]; b1; A2[:]; b2]

n == length(x0)

In [None]:
F1(x,v0) = ReLU(reshape(x[1:N1*N0],N1,N0)*v0 
    + x[N1*N0+1:(N1*N0+N1)])

F2(x,v1) = ReLU(reshape(x[(N1*N0+N1)+1:(N1*N0+N1)+N2*N1],N2,N1)*v1 
    + x[(N1*N0+N1)+N2*N1+1:(N1*N0+N1)+(N2*N1+N2)])[1]

F(x,vi) = F2(x,F1(x,vi))

In [None]:
ŷ(x, X) = [F(x,X[:,i]) for i=1:size(X,2)]

loss(x, X, y) = 1/length(y)*norm(ŷ(x, X) - y)^2

loss(x0, X_train, y_train), loss(x0, X_test, y_test)

In [None]:
d = randn(length(x0))
plot(t -> loss(x0+t*d, X_train, y_train), 0, 1, label=:none)

In [None]:
@time res = optimize(x -> loss(x, X_train, y_train), x0, LBFGS(); autodiff = :forward)

xmin = res.minimizer

loss(xmin, X_train, y_train), loss(xmin, X_test, y_test)

In [None]:
plot(xlabel=L"x", ylabel=L"y", aspect_ratio=:equal, colorbar=:none, size=(600,600),
    xlims=(ax,bx), ylims=(ay,by))
contour!(xx, yy, f, levels=flevels, color=1, contour_labels=true)
contour!(xx, yy, (s,t)->F(xmin,[s,t]), levels=flevels, color=:black, contour_labels=true)

In [None]:
@time res = optimize(x -> loss(x, X_train, y_train), xmin, LBFGS(); autodiff = :forward)

xmin = res.minimizer

loss(xmin, X_train, y_train), loss(xmin, X_test, y_test)

In [None]:
plt2 = plot(xlabel=L"x", ylabel=L"y", aspect_ratio=:equal, colorbar=:none, size=(600,600),
    xlims=(ax,bx), ylims=(ay,by))
#contour!(xx, yy, f, levels=flevels, color=:red, contour_labels=true)
contour!(xx, yy, (s,t)->F(xmin,[s,t]), levels=flevels, color=:black, contour_labels=true)

In [None]:
plt1 = plot(xlabel=L"x", ylabel=L"y", aspect_ratio=:equal, colorbar=:none, size=(600,600),
    xlims=(ax,bx), ylims=(ay,by), legend=:none)
contour!(xx, yy, f, levels=flevels, color=1, contour_labels=true)

plot(plt1, plt2, layout=(1,2), size=(900,500))

In [None]:
savefig("neural-net.png")

In [None]:
d = randn(length(xmin))
plot(t -> loss(xmin+t*d, X_test, y_test), 0, 1, label=:none)