In [12]:
using MLDatasets: MNIST
using Flux
include("AD/model.jl")

train_data = MNIST(:train)
test_data = MNIST(:test)

x_train = Float32.(reshape(train_data.features, 28 * 28, :)) 
y_train = Flux.onehotbatch(train_data.targets, 0:9)
x_test = Float32.(reshape(test_data.features, 28 * 28, :))  
y_test = Flux.onehotbatch(test_data.targets, 0:9)

rnn_settings= (;
    input = 14*14,
    hidden = 64,
    output = 10,
)

wxh = Variable(xavier_init( rnn_settings.hidden,rnn_settings.input)) 
dense1 = Variable(xavier_init( rnn_settings.output,rnn_settings.hidden))
whh=Variable(zeros(Float32,rnn_settings.hidden,rnn_settings.hidden))
b = Variable(zeros(Float32, rnn_settings.hidden))
net = RNN_(wxh,whh,b, dense1)

#x_train = x_train[:,  1:5000]
#y_train = y_train[:, 1:5000]
#x_test = x_test[:, 1:5000]
#y_test = y_test[:, 1:5000]

settings = (;
    eta = 15e-3,
    epochs = 5,
    batch_size = 100,
)

train(net, x_train, y_train, settings,rnn_settings)
test(net, x_test, y_test,rnn_settings)

Epoch: 1
   Average loss: 1.2776
   Train accuracy: 0.6183
Epoch: 2
   Average loss: 0.6046
   Train accuracy: 0.8319
Epoch: 3
   Average loss: 0.4470
   Train accuracy: 0.8767
Epoch: 4
   Average loss: 0.3801
   Train accuracy: 0.8942
Epoch: 5
   Average loss: 0.3405
   Train accuracy: 0.9043
150.525330 seconds (178.00 M allocations: 84.898 GiB, 33.16% gc time, 0.81% compilation time)
Test accuracy: 0.9055



In [21]:

typeof(Float32(15e-3))

Float32

In [15]:
Float32(sqrt(2.0 / 12))

0.4082483f0

In [25]:
function train_plot(rnn::RNN_, x::Any, y::Any, settings,rnn_settings)
    # Define an array to store loss values
    losses = Float64[]

    samples =  size(x, 2)
    if size(x, 1)%rnn_settings.input != 0
        println("Size/input rest of division needs to be 0")
    end
    rnn_cells= Int( size(x, 1)/rnn_settings.input)
    
    x_train = [Constant(zeros(rnn_settings.input)) for _ in 1:rnn_cells]
    y_train = Constant(zeros(rnn_settings.output))
    
    @time for i in 1:settings.epochs
        
        epoch_loss = 0.0
        global correct_prediction = 0
        global cumulative = 0

        println("Epoch: ", i)

        for j in 1:samples
            
            for k in 1:rnn_cells
                @views x_train[k].output .= x[rnn_settings.input * (k - 1) + 1:rnn_settings.input * k, j]
            end
            @views y_train.output .= y[:, j]
            
            h = Variable(zeros(rnn_settings.hidden), name="h0")
            for k in 1:rnn_cells
                h = recurrent(x_train[k], rnn.wxh, h, rnn.whh) |> tanh
            end

            d1 = dense(h, rnn.dense1) |> identity
            e = cross_entropy_loss(d1, y_train)
            graph= topological_sort(e)

            epoch_loss += forward!(graph)
            
            backward!(graph)

            if j % settings.batch_size == 0
                update_weights!(graph, settings.eta, settings.batch_size)
            end
        end

        # Compute average loss per sample
        avg_loss = epoch_loss / samples
        push!(losses, avg_loss)
        
        @printf("   Average loss: %.4f\n", avg_loss)
        @printf("   Train accuracy: %.4f\n", correct_prediction / cumulative)
    end
    
    # Plot loss over iterations
    plot(losses, xlabel="Iteration", ylabel="Loss", title="Loss Over Iterations", legend=false)
    return losses
    
end

train_plot (generic function with 1 method)

In [2]:
b

Variable([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], nothing, nothing)

In [26]:
using MLDatasets: MNIST
using Flux
include("AD/model.jl")

train_data = MNIST(:train)
test_data = MNIST(:test)



@time x_train = Float64.(reshape(train_data.features, 28 * 28, :)) 
@time y_train = Flux.onehotbatch(train_data.targets, 0:9)
@time x_test = Float64.(reshape(test_data.features, 28 * 28, :))  
@time y_test = Flux.onehotbatch(test_data.targets, 0:9)

  0.110869 seconds (16 allocations: 358.887 MiB)
  0.000111 seconds (3 allocations: 234.453 KiB)
  0.062011 seconds (5 allocations: 59.815 MiB, 68.59% gc time)
  0.000035 seconds (3 allocations: 39.141 KiB)


10×10000 OneHotMatrix(::Vector{UInt32}) with eltype Bool:
 ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  …  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  1  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  1  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  …  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅     ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1
 1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅     ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅
 ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  1  ⋅  1  ⋅  ⋅  1     ⋅  ⋅  ⋅  ⋅  1  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅  ⋅

In [27]:
@time sparse(x_train)

784×60000 SparseMatrixCSC{Float64, Int64} with 8994156 stored entries:
⎡⣿⣿⣷⣿⣷⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣾⣿⣿⣷⣿⣿⣾⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⎤
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦

In [29]:
@time sparse(x_train)

  0.157371 seconds (7 allocations: 137.698 MiB, 22.73% gc time)


784×60000 SparseMatrixCSC{Float64, Int64} with 8994156 stored entries:
⎡⣿⣿⣷⣿⣷⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣾⣿⣿⣷⣿⣿⣾⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⎤
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦

In [6]:
x_train = x_train[:,  1:5000]

784×5000 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.

In [13]:
using SparseArrays 
x_train=sparse(x_train)

784×60000 SparseMatrixCSC{Float64, Int64} with 8994156 stored entries:
⎡⣿⣿⣷⣿⣷⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣾⣿⣿⣷⣿⣿⣾⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⎤
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦

In [14]:
xxx=randn(Float64,784, 784) * sqrt(2.0 / (784 + 784))

784×784 Matrix{Float64}:
  0.000973476   0.0497486     0.0102124    …  -0.0420359     0.0205646
 -0.0129198    -0.0137566    -0.0268735        0.0173371     0.0432287
 -0.0352902    -0.0712763     0.0341737       -0.0405989    -0.0349429
  0.0495393     0.0575855    -0.00336064      -0.00686282    0.0646963
 -0.0170074    -0.015863      0.0290565       -0.0245232    -0.0554046
  0.0619421    -0.00287976   -0.0767534    …   0.0314432     0.0319532
  0.0367079     0.0220681     0.0174309       -0.0621032     0.00394242
 -0.0906319    -0.00582968    0.00694899       0.0152242     0.0350668
  0.0170347     0.0227409     0.00247228       0.0196739    -0.0114039
 -0.00765178   -0.0167262    -0.0527098        0.0565713     0.0263154
  0.00202239   -0.00723195   -0.000264667  …   0.0479189    -0.154034
 -0.0066549    -0.03035      -0.0244575        0.0308314    -0.0331183
 -0.0076624    -0.00782464   -0.0430073       -0.000881689  -0.00335293
  ⋮                                        ⋱       

In [10]:
using sparsearrays

LoadError: ArgumentError: Package sparsearrays not found in current path.
- Run `import Pkg; Pkg.add("sparsearrays")` to install the sparsearrays package.

In [15]:
spar_x=sparse(x_train)

784×60000 SparseMatrixCSC{Float64, Int64} with 8994156 stored entries:
⎡⣿⣿⣷⣿⣷⣾⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⣿⣿⣿⣿⣾⣿⣿⣷⣿⣿⣾⣷⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣷⎤
⎣⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⎦

In [19]:
@time xxx*spar_x
@time xxx*x_train

  1.560540 seconds (137.00 k allocations: 368.591 MiB, 1.10% gc time, 11.97% compilation time)
  1.294785 seconds (2 allocations: 358.887 MiB)


784×60000 Matrix{Float64}:
 -0.445108   -0.107857    0.108578    …  -0.188393   -0.038572    0.049831
 -0.599415   -0.229959   -0.320074       -0.50759    -0.0101395  -0.762124
 -0.119565    0.0799112  -0.188346        0.292048   -0.340691   -0.171143
  0.0405539  -0.525029   -0.690538       -0.321562    0.0514111  -0.27028
 -0.25839    -0.20474    -0.44793        -0.44824     0.164876    0.333853
  0.127321    0.536699    0.180477    …   0.145287    0.294997    0.312422
 -0.0682627   0.149572   -0.278846       -0.154568   -0.0252376   0.0322067
 -0.343287   -0.159764    0.279803       -0.168627   -0.306666   -0.460444
 -0.720333   -0.423422   -0.229187       -0.262963   -0.32499     0.152529
 -0.300196   -0.60811    -0.336995       -0.162115   -0.290892   -0.0545371
  0.203197    0.254024   -0.295336    …   0.0217827  -0.0120911   0.00289611
  0.173536    0.761648    0.327596       -0.0194582   0.255458    0.0862209
  0.126001   -0.184103    0.0923756      -0.126765    0.0325569  -0.1