In [None]:
#imports
using Distributions;
using Random;
using DataFrames;
using CSV;
using Statistics;
using LinearAlgebra;
using Flux;
using Flux: params, 
            Dense, 
            Chain, 
            glorot_normal, 
            normalise, 
            Optimiser,
            train!;


function nn_estimation(file_name)
        
    #parameters of the function
    r = Float16(0.05);
    T = Int(1);  #ttm
    N= Int(1)
    K = Int(100); #strike
    mu = Float16(-0.05); 
    a = 90;
    b = 110;

    #standard normalisation function 
    function norm_ab(y)
        mid_point = (a + b) / 2
        y_norm(y) = (y .- mid_point) ./ (b-a)
        return mapslices(y_norm,y;dims =1)
    end
    
    d = Int(100); #number of timesteps
    batch_samples = Int(800); #total samples in batch
    initial_sampler = Uniform(a,b);
    
    time_grid = LinRange(1/d, 1, d); 
  #  sigma = 0.1 .+ 0.5 .* time_grid; #define changing variance on the time grid
    beta = 0.1 .+ 0.5 .* time_grid; #different variance for each component
    Q = ones((d,d)) * 0.5
    Q[diagind(Q)] .= 1
    sigma = Array(cholesky(Q).L)
    sigma_norms = sqrt.(sum(sigma.^2;dims=2))

    mc_samples = Int(500000); #number of samples to take for Monte-Carlo approximation
    mc_exp_rounds = Int(1); #number of times to repeat MC for the error average
    
    learning_rate = Float16(0.001); #initial learning rate
    learn_rate_decrease = 250000; #how frequently to decay learning rate

    train_steps = Int(750000); #total number of training epochs
    err_step = Int(25000); #after how many training steps to compare errors

    function x_sde(X::Array) #discretisation of SDE 
        for _ in 1:N
            eps = rand(Normal(0,1),(d,1))
            X = X .* exp.((mu .- 0.5*(beta.*sigma_norms).^2).* T 
            .+ (beta.*(sigma*eps)))
        end
        return X
    end;
    
#    function x_sde(X::Array) #discretisation of SDE 
#        sde(X) = X .* (exp.((mu .- 0.5*sigma.^2)*T + ( sqrt(T)* sigma .* rand(Normal(0,1),d))))
#        mapslices(sde, X; dims =1)
#    end;

    function x_phi(x::Array) #function to use with FK expectation
        phi_(x) = exp(-mu*T) * max((K - minimum(x)),0)
        mapslices(phi_, x; dims =1)
    end;
    
    #initialise error file and create row headers
    df_row = DataFrame(step = "step",
                        l1_errs="l1_errs",l2_errs="l2_errs",li_errs="li_errs",
                        rel_l1_errs="rel_l1_errs",rel_l2_errs="rel_l2_errs",rel_li_errs="rel_li_errs",
                        t_nn="t_nn",t_mc ="t_mc")
    
    CSV.write(file_name, df_row, append = true);

    
    #calculate errors and write to file
    function k_iter_output(X_init, t_nn, k)
        
        #generate mc data
        function mc_sampler(X_init)
            x_mc_store = zeros((1,batch_samples))
            for _ in 1:mc_samples
                x_mc_store += Array((x_phi(x_sde(X_init))))
            end
            phi_mc = x_mc_store ./ mc_samples;
            return Array(phi_mc)
        end
           
        #find the expected error vs mc samples
        
        #initial errors for finding the mean
        t_mc = 0 
        l1_errs,l2_errs,li_errs = 0.,0.,0.
        rel_l1_errs, rel_l2_errs, rel_li_errs = 0., 0., 0.
        
        #run through testmode NN for comparison
        X_0 = Array(norm_ab(X_init))
        testmode!(m)
        u_i = m(X_0)    
        
        for _ in 1:mc_exp_rounds

            #take mc samples
            t_start = time()
            mc_i = mc_sampler(X_init)
            t_end = time()
            t_mc += t_end - t_start
            u_ref = abs.(max.(mc_i,1e-8))
            
            #calculate and output errors
            errs = vec(abs.(u_i - mc_i))
            l1_errs += mean(errs)
            l2_errs += mean(errs.^2)
            li_errs = max(li_errs, maximum(errs))
            rel_errs = errs ./ u_ref
            rel_l1_errs += mean(rel_errs)
            rel_l2_errs += mean(rel_errs.^2)    
            rel_li_errs = max(rel_li_errs,maximum(rel_errs))
            
        end

        #find means
        t_mc = t_mc / mc_exp_rounds
        l1_errs,l2_errs = l1_errs/mc_exp_rounds, sqrt(l2_errs/mc_exp_rounds)
        rel_l1_errs,rel_l2_errs = rel_l1_errs/mc_exp_rounds, sqrt(rel_l2_errs/mc_exp_rounds)  
        
        #write to file
        df_row = DataFrame(step = k,
                            l1_errs=l1_errs,l2_errs=l2_errs,li_errs=li_errs,
                            rel_l1_errs=rel_l1_errs,rel_l2_errs=rel_l2_errs,rel_li_errs=rel_li_errs,
                            t_nn=t_nn,t_mc =t_mc)
        
        CSV.write(file_name, df_row, append = true)
    end
    
    
    function generate_training_data(X_init,x_sde,x_phi)    
        X_0 = Array(norm_ab(X_init))
        X_sde = x_sde(X_init)
        y_train = x_phi(X_sde)
        return [(X_0,y_train)]
    end

    #define network layers
    input = Dense(d, d + d, tanh; 
                           bias = false, 
                           init = glorot_normal)

    hidden = Dense(d + d, d + d, tanh;
                            bias = false,
                            init = glorot_normal)

    #no activation on the last layer
    output = Dense(d + d,1,identity)

    batch_norm_layer = BatchNorm(d + d, identity;
                                            initβ = zeros, 
                                            initγ = ones,
                                            ϵ = 1e-6, 
                                            momentum = 0.9)
    
    #define network architecture
    m = Chain(input,
         #       batch_norm_layer,
                hidden,
         #       batch_norm_layer,
        #        hidden,
        #        batch_norm_layer,
                output)
    
    #loss function = 
    loss(u,v) = mean((m(u) - v).^2)
    
    ps = Flux.params(m)

    opt = Optimiser(ExpDecay(learning_rate,0.01,learn_rate_decrease,1e-8),ADAM()) #optimiser

    #set to train mode
    trainmode!(m)
    
    #generate initial training data
    X_init = rand(initial_sampler,(d,batch_samples))
    data = generate_training_data(X_init,x_sde,x_phi)
    
    k_iter_output(X_init, 0, 0) #compare with MC at this stage

    #start training time counter
    t_nn_start = time()
    
    for k in 1:train_steps
        
        #generate new training data
        X_init = rand(initial_sampler,(d,batch_samples))
        data = generate_training_data(X_init,x_sde,x_phi)

        #learning step
        train!(loss,ps,data,opt)

        #output the errors and timings at these steps
        if mod(k,err_step) == 0 
            
            t_nn_end = time()
            t_nn = t_nn_end - t_nn_start #timer for the training steps
            k_iter_output(X_init, t_nn, k) #compare with MC at this stage
            t_nn_start = time() #start new training timer
            trainmode!(m) #set back train mode
            
        end
    end
    
    print("Output ready")

end

nn_estimation("corr_gbm_d100_validation_errs.csv");

