In [1]:
#----------------__CHECKING__------------------- #
using Flux, CuArrays
using OpenAIGym
import Reinforce.action
import Reinforce:run_episode
import Flux.params
using Flux.Tracker: grad, update!
using Flux: onehot
using Statistics
using Distributed
using Distributions
using LinearAlgebra
using Base.Iterators
using BSON:@save,@load
using JLD

include("policy.jl")

│ Try running `] pin CuArrays@0.9`.
└ @ Flux.CUDA /home/shreyas/.julia/packages/Flux/WSB7k/src/cuda/cuda.jl:12


value_fn (generic function with 2 methods)

In [2]:
"""
HYPERPARAMETERS
"""
# Environment Creation #
env_name = "CartPole-v0"
MODE = "CAT" # Can be either "CON" (Continuous) or "CON" (Categorical)

# Environment Variables #
STATE_SIZE = 4
ACTION_SIZE = 2
MIN_RANGE = -2.0f0
MAX_RANGE = 2.0f0
EPISODE_LENGTH = 100
TEST_STEPS = 10000
# Policy parameters #
η = 3e-4 # Learning rate
STD = 0.0 # Standard deviation
HIDDEN_SIZE = 256
# GAE parameters
γ = 0.99
λ = 0.95
# Optimization parameters
PPO_EPOCHS = 10
NUM_EPISODES = 15000
BATCH_SIZE = 5
c₀ = 1.0
c₁ = 0.5
c₂ = 0.001
# PPO parameters
ϵ = 0.2
# FREQUENCIES
SAVE_FREQUENCY = 50
VERBOSE_FREQUENCY = 5
global_step = 0

# Global variable to monitor losses
reward_hist = []
policy_l = 0.0
entropy_l = 0.0
value_l = 0.0

0.0

In [3]:
function scale_rewards(rewards)
    return rewards # ./ 16.2736044
end

scale_rewards (generic function with 1 method)

In [4]:
"""
Define the networks
"""

if MODE == "CON"
	policy_μ,policy_Σ = gaussian_policy(STATE_SIZE,HIDDEN_SIZE,ACTION_SIZE)
	value = value_fn(STATE_SIZE,HIDDEN_SIZE,ACTION_SIZE,tanh)
elseif MODE == "CAT"
	policy = categorical_policy(STATE_SIZE,HIDDEN_SIZE,ACTION_SIZE)
	value = value_fn(STATE_SIZE,HIDDEN_SIZE,ACTION_SIZE,relu)
else 
	error("MODE can only be (CON) or (CAT)...")
end

opt = ADAM(η)

ADAM(0.0003, (0.9, 0.999), IdDict{Any,Any}())

In [5]:
"""
Functions to get rollouts
"""

function action(state)
    # Acccounting for the element type
    state = reshape(Array(state),length(state),1) 

    a = nothing
    if MODE == "CON"
	    # Our policy outputs the parameters of a Normal distribution
	    μ = policy_μ(state)
	    μ = reshape(μ,ACTION_SIZE)
	    log_std = policy_Σ
	    
	    σ² = (exp.(log_std)).^2
	    Σ = diagm(0=>σ².data)
	    
	    dis = MvNormal(μ.data,Σ)
	    
	    a = rand(dis,ACTION_SIZE)
	else
		action_probs = policy(state).data
        action_probs = reshape(action_probs,ACTION_SIZE)
    	a = sample(1:ACTION_SIZE,Weights(action_probs)) - 1
    end
    a
end

function run_episode(env)
    experience = []
    
    s = reset!(env)
    for i in 1:EPISODE_LENGTH
        a = action(s)
        # a = convert.(Float64,a)
        
        if MODE == "CON"
            a = reshape(a,ACTION_SIZE)
        end

        r,s_ = step!(env,a)
        push!(experience,(s,a,r,s_))
        s = s_
        if env.done
           break 
        end
    end
    experience
end

run_episode (generic function with 2 methods)

In [6]:
"""
Multi-threaded parallel rollout collection
"""

num_processes = 9
addprocs(num_processes) 

@everywhere function collect(env)
    run_episode(env)
end

@everywhere function rollout()
  env = GymEnv(env_name)
  return collect(env)
end

function get_rollouts()
    g = []
    for  w in workers()
      push!(g, rollout())
    end

    fetch.(g)
end

get_rollouts (generic function with 1 method)

In [131]:
"""
Generalized Adavantage Estimation
"""

function gae(states,actions,rewards,next_states)
    """
    Returns a Generalized Advantage Estimate for an episode
    """
    Â = []
    A = 0.0
    for i in reverse(1:length(states))
        δ = rewards[i] + γ*cpu.(value(next_states[i]).data[1]) - cpu.(value(states[i]).data[1])
        A = δ + (γ*λ*A)
        push!(Â,A)
    end
    
    Â = reverse(Â)
    return Â
end

function disconunted_returns(rewards)
    r = 0.0
    returns = []
    for i in reverse(1:length(rewards))
        r = rewards[i] + γ*r
        push!(returns,r)
    end
    returns = reverse(returns)
    returns
end

"""
Calculate Log Probabilities
"""
function log_prob_from_actions(states,actions)
    """
    Returns log probabilities of the actions taken
    
    states,actions : episode vairbles in the form of a list
    """
    log_probs = []
    
    for i in 1:length(states)
    	if MODE == "CON"
	        μ = reshape(policy_μ(states[i]),ACTION_SIZE).data
	        logΣ = policy_Σ.data |> cpu
        	push!(log_probs,normal_log_prob(μ,logΣ,actions[i]))
        else
        	action_probs = policy(states[i])
        	prob = action_probs[actions[i],:].data
        	push!(log_probs,log.(prob))
        end
    end
    
    log_probs
end


log_prob_from_actions

In [132]:
"""
Process and extraction information from rollouts
"""

function process_rollouts(rollouts)
    """
    rollouts : variable returned by calling `get_rollouts`
    
    Returns : 
    states, actions, rewards for minibatch processing
    """
    # Process the variables
    states = []
    actions = []
    rewards = []
    next_states = []
    advantages = []
    returns = []
    log_probs = []
    
    # Logging statistics
    episode_mean_returns = []
    
    for ro in rollouts
        episode_states = []
        episode_actions = []
        episode_rewards = []
        episode_next_states = []
        
        for i in 1:length(ro)
             push!(episode_states,Array(ro[i][1]))
             
             if MODE == "CON"
                 push!(episode_actions,ro[i][2])
             else
                 push!(episode_actions,ro[i][2] + 1)
             end
             
             push!(episode_rewards,ro[i][3])
             push!(episode_next_states,ro[i][4])
        end
        
        episode_rewards = scale_rewards(episode_rewards)
        episode_advantages = gae(episode_states,episode_actions,episode_rewards,episode_next_states)
        # episode_rewards = normalise(episode_rewards)
        
        episode_returns = disconunted_returns(episode_rewards)
        
        push!(episode_mean_returns,mean(episode_returns))
        
        push!(states,episode_states)
        push!(actions,episode_actions)
        push!(rewards,episode_rewards)
        push!(advantages,episode_advantages)
        push!(returns,episode_returns)
        push!(log_probs,log_prob_from_actions(episode_states,episode_actions))
    end
    
    states = cat(states...,dims=1)
    actions = cat(actions...,dims=1)
    rewards = cat(rewards...,dims=1)
    advantages = cat(advantages...,dims=1)
    returns = cat(returns...,dims=1)
    log_probs = cat(log_probs...,dims=1)
    
    push!(reward_hist,mean(episode_mean_returns))
    
    if length(reward_hist) <= 100
        println("RETURNS : $(mean(episode_mean_returns))")
    else
        println("MEAN RETURNS : $(mean(reward_hist))")
        println("LAST 100 RETURNS : $(mean(reward_hist[end-100:end]))")
    end
    
    return hcat(states...),hcat(actions...),hcat(rewards...),hcat(advantages...),hcat(returns...),hcat(log_probs...)
end

"""
Loss function definition
"""
function loss(states,actions,advantages,returns,old_log_probs)
    global global_step,policy_l,entropy_l,value_l
    global_step += 1
    
    if MODE == "CON"
	    μ = policy_μ(states)
	    logΣ = policy_Σ 
        
	    new_log_probs = normal_log_prob(μ,logΣ,actions)
	else
		action_probs = policy(states) # ACTION_SIZE x BATCH_SIZE
		actions_one_hot = zeros(ACTION_SIZE,size(action_probs)[end])
        
		for i in 1:size(action_probs)[end]
			actions_one_hot[actions[:,i][1],i] = 1.0				
		end
        
		new_log_probs = log.(sum((action_probs .+ 1f-5) .* actions_one_hot,dims=1))
    end
    
    # Surrogate loss computations
    ratio = exp.(new_log_probs .- old_log_probs)
    surr1 = ratio .* advantages
    surr2 = clamp.(ratio,(1.0 - ϵ),(1.0 + ϵ)) .* advantages
    policy_loss = mean(min.(surr1,surr2))
    
    value_predicted = value(states)
    value_loss = mean((value_predicted .- returns).^2)
    
    if MODE == "CON"
        entropy_loss = mean(normal_entropy(logΣ))
    else
        entropy_loss = mean(categorical_entropy(action_probs))
    end
    
    policy_l = policy_loss.data
    entropy_l = entropy_loss.data
    value_l = value_loss.data
    
    -c₀*policy_loss + c₁*value_loss - c₂*entropy_loss
end

"""
Optimization Function
"""
function ppo_update(states,actions,advantages,returns,old_log_probs)
    # Define model parameters
    if MODE == "CON"
        model_params = params(params(policy_μ)...,params(policy_Σ)...,params(value)...)
    else
        model_params = params(params(policy)...,params(value)...)
    end

    # Calculate gradients
    gs = Tracker.gradient(() -> loss(states,actions,advantages,returns,old_log_probs),model_params)

    # Take a step of optimisation
    update!(opt,model_params,gs)
end

ppo_update

In [133]:
"""
Train
"""

function train_step()    
    routs = get_rollouts()
    states,actions,rewards,advantages,returns,log_probs = process_rollouts(routs)
    
    idxs = partition(1:size(states)[end],BATCH_SIZE)
    
    for epoch in 1:PPO_EPOCHS
        for i in idxs
            mb_states = states[:,i] 
            mb_actions = actions[:,i] 
            mb_advantages = advantages[:,i] 
            mb_returns = returns[:,i] 
            mb_log_probs = log_probs[:,i]
            
            ppo_update(mb_states,mb_actions,mb_advantages,mb_returns,mb_log_probs)
        end
    end
end

function train()
    for i in 1:NUM_EPISODES
        println("EP : $i")
        train_step()
        println("Ep done")
        
        # Anneal learning rate
        if i%300 == 0
            if opt.eta > 1e-6
                opt.eta = opt.eta / 3.0
            end
        end
        
        if i % VERBOSE_FREQUENCY == 0
            # Show important statistics
            println("-----___Stats___-----")
            
            if MODE == "CON"
                println("Entropy : $(normal_entropy(policy_Σ))")
            end
            
            println("Policy Loss : $(policy_l)")
            println("Entropy Loss : $(entropy_l)")
            println("Value Loss : $(value_l)")
        end
        
        if i%SAVE_FREQUENCY == 0
        	if MODE == "CON"
	            @save "weights/policy_mu.bson" policy_μ
	            @save "weights/policy_sigma.bson" policy_Σ
	            @save "weights/value.bson" value
	        else
	        	@save "weights/policy_cat.bson" policy
	        	@save "weights/value.bson" value
            end
            
            save("stats.jld","rewards",reward_hist)
            println("\n\n\n----MAX REWRD SO FAR : $(maximum(reward_hist))---\n\n\n")
        end
    end
end

train (generic function with 1 method)

In [134]:
train()

EP : 1
Float32[2.19058e-5, 0.999978] (tracked)
Float32[0.999978]
Float32[6.79824e-6, 0.999993] (tracked)
Float32[0.999993]
Float32[2.00836e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[5.08198e-7, 1.0] (tracked)
Float32[1.0]
Float32[1.14919e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.5061e-8, 1.0] (tracked)
Float32[1.0]
Float32[5.27061e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.06539e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.05703e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.75426e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.70566e-5, 0.999983] (tracked)
Float32[0.999983]
Float32[4.96306e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.36759e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.12165e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.56257e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.33271e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.60118e-9, 1.0] (tracked)
Float32[1.0]
Float32[4.86987e-10, 1.0] (tracked)
Float32[1.0]
Float32[8.66897e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.35475e-5

Float32[1.0]
Float32[2.29307e-9, 1.0] (tracked)
Float32[1.0]
Float32[4.17564e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.86874e-5, 0.999981] (tracked)
Float32[0.999981]
Float32[5.47828e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.53977e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[3.70538e-7, 1.0] (tracked)
Float32[1.0]
Float32[7.9064e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.62838e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.2194e-9, 1.0] (tracked)
Float32[1.0]
Float32[6.10749e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.10608e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.63289e-5, 0.999984] (tracked)
Float32[0.999984]
Float32[4.74841e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.31829e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.08707e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.48601e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.32229e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.58857e-9, 1.0] (tracked)
Float32[1.0]
Float32[4.86017e-10, 1.0] (tracked)
Float32[1.0]
Float32[8.714

Float32[1.0]
Float32[1.68133e-5, 0.999983] (tracked)
Float32[0.999983]
Float32[4.67822e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.25293e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.88331e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.83633e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.13728e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.12607e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.81415e-10, 1.0] (tracked)
Float32[1.0]
Float32[6.53024e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.38641e-5, 0.999986] (tracked)
Float32[0.999986]
Float32[4.12365e-6, 0.999996] (tracked)
Float32[0.999996]
Float32[1.17813e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.90499e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.29132e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.31514e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.64047e-9, 1.0] (tracked)
Float32[1.0]
Float32[5.0875e-10, 1.0] (tracked)
Float32[1.0]
Float32[9.35161e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.62747e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.3

Float32[1.0]
Float32[1.67609e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.0334e-10, 1.0] (tracked)
Float32[1.0]
Float32[5.23608e-11, 1.0] (tracked)
Float32[1.0]
MEAN RETURNS : 6.34937268339823
LAST 100 RETURNS : 5.637444534917115
Ep done
EP : 8
Float32[1.25769e-5, 0.999987] (tracked)
Float32[0.999987]
Float32[3.4682e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[9.13093e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[2.04631e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.09641e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.88393e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.45518e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.57116e-10, 1.0] (tracked)
Float32[1.0]
Float32[4.333e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.03725e-5, 0.99999] (tracked)
Float32[0.99999]
Float32[2.95787e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[8.01325e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.84465e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.78992e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.54553e-9, 1.0] (tra

Float32[7.27269e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.68522e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.40649e-8, 1.0] (tracked)
Float32[1.0]
Float32[6.62367e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.23173e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.19497e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.72654e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.99724e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.18125e-5, 0.999988] (tracked)
Float32[0.999988]
Float32[3.26522e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[8.71053e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[2.02001e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.07092e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.86626e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.46864e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.62515e-10, 1.0] (tracked)
Float32[1.0]
Float32[4.47555e-11, 1.0] (tracked)
Float32[1.0]
Float32[7.24853e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.24129e-5, 0.999988] (tracked)
Float32[0.999988]
Float32[3.43702e-6, 0.999997] (t

Float32[1.49308e-11, 1.0] (tracked)
Float32[1.0]
Float32[9.16542e-6, 0.999991] (tracked)
Float32[0.999991]
Float32[2.47707e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[6.4412e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.44216e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.82834e-8, 1.0] (tracked)
Float32[1.0]
Float32[5.32802e-9, 1.0] (tracked)
Float32[1.0]
Float32[9.69077e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.68665e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.79804e-11, 1.0] (tracked)
Float32[1.0]
Float32[4.40764e-12, 1.0] (tracked)
Float32[1.0]
Float32[6.79893e-6, 0.999993] (tracked)
Float32[0.999993]
Float32[1.81618e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[4.59137e-7, 1.0] (tracked)
Float32[1.0]
Float32[9.75573e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.87209e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.4757e-9, 1.0] (tracked)
Float32[1.0]
Float32[6.17004e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.0479e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.69545e-11, 1.0] (tracked

Float32[1.0]
Float32[1.43943e-11, 1.0] (tracked)
Float32[1.0]
Float32[9.84629e-6, 0.99999] (tracked)
Float32[0.99999]
Float32[2.7062e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[7.20975e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.73523e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.53972e-8, 1.0] (tracked)
Float32[1.0]
Float32[6.82049e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.27119e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.26133e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.83625e-11, 1.0] (tracked)
Float32[1.0]
Float32[6.17168e-12, 1.0] (tracked)
Float32[1.0]
Float32[9.37578e-13, 1.0] (tracked)
Float32[1.0]
MEAN RETURNS : 6.282698746701842
LAST 100 RETURNS : 5.440145201361335
Ep done
EP : 15
Float32[6.44736e-6, 0.999994] (tracked)
Float32[0.999994]
Float32[1.6097e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[3.85611e-7, 1.0] (tracked)
Float32[1.0]
Float32[7.87464e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.423e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.46778e-9, 1.0] (tracked)
F

Float32[1.27429e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.12048e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.56575e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.20608e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.13266e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.61796e-10, 1.0] (tracked)
Float32[1.0]
Float32[5.86309e-11, 1.0] (tracked)
Float32[1.0]
Float32[9.04647e-12, 1.0] (tracked)
Float32[1.0]
Float32[5.66127e-6, 0.999994] (tracked)
Float32[0.999994]
Float32[1.40297e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.37488e-7, 1.0] (tracked)
Float32[1.0]
Float32[7.10754e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.2845e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.21901e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.66214e-10, 1.0] (tracked)
Float32[1.0]
Float32[5.78672e-11, 1.0] (tracked)
Float32[1.0]
Float32[8.71176e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.49745e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.17019e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.89711e-7, 1.0] (tracked

Float32[1.0]
Float32[3.34594e-11, 1.0] (tracked)
Float32[1.0]
Float32[4.85789e-6, 0.999995] (tracked)
Float32[0.999995]
Float32[1.16424e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.69265e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.34953e-8, 1.0] (tracked)
Float32[1.0]
Float32[9.26515e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.53983e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.45232e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.7315e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.4129e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.03129e-6, 0.999996] (tracked)
Float32[0.999996]
Float32[1.0452e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.61685e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.69057e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.07246e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.93708e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.34946e-10, 1.0] (tracked)
Float32[1.0]
Float32[5.54335e-11, 1.0] (tracked)
Float32[1.0]
Float32[8.72287e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.29909e-12, 1.0] (trac

Float32[1.0]
Float32[8.15883e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.2447e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.81595e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.81841e-6, 0.999996] (tracked)
Float32[0.999996]
Float32[9.44326e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[2.2557e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.7163e-8, 1.0] (tracked)
Float32[1.0]
Float32[8.49583e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.45712e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.40699e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.78293e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.65244e-12, 1.0] (tracked)
Float32[1.0]
Float32[7.99264e-13, 1.0] (tracked)
Float32[1.0]
Float32[3.79943e-6, 0.999996] (tracked)
Float32[0.999996]
Float32[9.26527e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[2.18782e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.50234e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.98481e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.34996e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.199e-10, 1.0] (tracke

Float32[1.0]
Float32[1.63164e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.5183e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.68729e-12, 1.0] (tracked)
Float32[1.0]
Float32[5.10431e-13, 1.0] (tracked)
Float32[1.0]
Float32[2.76428e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[6.45699e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.45191e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.81667e-8, 1.0] (tracked)
Float32[1.0]
Float32[4.77313e-9, 1.0] (tracked)
Float32[1.0]
Float32[7.69011e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.18047e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.72926e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.4069e-12, 1.0] (tracked)
Float32[1.0]
Float32[2.88954e-6, 0.999997] (tracked)
Float32[0.999997]
Float32[6.85637e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.57061e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.1051e-8, 1.0] (tracked)
Float32[1.0]
Float32[5.32487e-9, 1.0] (tracked)
Float32[1.0]
Float32[8.74614e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.37524e-10, 1.0] (tr

Float32[1.0]
Float32[5.26155e-9, 1.0] (tracked)
Float32[1.0]
Float32[8.95281e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.46608e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.28333e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.37574e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.71365e-13, 1.0] (tracked)
Float32[1.0]
Float32[2.37499e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[5.48987e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.22936e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.38076e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.98346e-9, 1.0] (tracked)
Float32[1.0]
Float32[6.36398e-10, 1.0] (tracked)
Float32[1.0]
Float32[9.7413e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.42091e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.97002e-12, 1.0] (tracked)
Float32[1.0]
Float32[2.15634e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[4.97088e-7, 1.0] (tracked)
Float32[1.0]
Float32[1.10546e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.13135e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.56664e-9, 1.0] (tracked)
Fl

Float32[3.90153e-7, 1.0] (tracked)
Float32[1.0]
Float32[8.15877e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.46328e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.28985e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.41059e-10, 1.0] (tracked)
Float32[1.0]
Float32[4.86016e-11, 1.0] (tracked)
Float32[1.0]
Float32[6.58733e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.7364e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[4.10097e-7, 1.0] (tracked)
Float32[1.0]
Float32[9.38792e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.90788e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.29819e-9, 1.0] (tracked)
Float32[1.0]
Float32[5.38743e-10, 1.0] (tracked)
Float32[1.0]
Float32[8.4083e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.24853e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.75785e-12, 1.0] (tracked)
Float32[1.0]
Float32[2.33513e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.69987e-6, 0.999998] (tracked)
Float32[0.999998]
Float32[3.79143e-7, 1.0] (tracked)
Float32[1.0]
Float32[8.09198e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.4825

Float32[1.0]
Float32[3.521e-10, 1.0] (tracked)
Float32[1.0]
Float32[5.39309e-11, 1.0] (tracked)
Float32[1.0]
Float32[7.84852e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.08185e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.40583e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.41039e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.03354e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.24489e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.09833e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.69789e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.49456e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.50356e-11, 1.0] (tracked)
Float32[1.0]
Float32[4.67154e-12, 1.0] (tracked)
Float32[1.0]
Float32[5.91352e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.48889e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[3.50769e-7, 1.0] (tracked)
Float32[1.0]
Float32[8.02735e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.62577e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.82031e-9, 1.0] (tracked)
Float32[1.0]
Float32[4.58268e-10, 1.0] (tracked)
Float32[1.0]

Float32[5.39744e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.282e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.6652e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.32266e-8, 1.0] (tracked)
Float32[1.0]
Float32[9.209e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.38154e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.9586e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.64968e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.40437e-12, 1.0] (tracked)
Float32[1.0]
MEAN RETURNS : 6.123031483980054
LAST 100 RETURNS : 5.027950354254776
Ep done
EP : 34
Float32[1.02442e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.11098e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.19874e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.23263e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.0758e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.51185e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.01999e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.57224e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.14393e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.54716e-7, 

Float32[1.0]
Float32[1.87082e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.71091e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.71985e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.83211e-13, 1.0] (tracked)
Float32[1.0]
Float32[5.91433e-14, 1.0] (tracked)
Float32[1.0]
Float32[1.04919e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.33399e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.056e-8, 1.0] (tracked)
Float32[1.0]
Float32[9.82856e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.60954e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.46752e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.64022e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.08595e-12, 1.0] (tracked)
Float32[1.0]
Float32[6.73175e-13, 1.0] (tracked)
Float32[1.0]
Float32[8.39801e-14, 1.0] (tracked)
Float32[1.0]
Float32[1.02294e-6, 0.999999] (tracked)
Float32[0.999999]
Float32[2.25373e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.82124e-8, 1.0] (tracked)
Float32[1.0]
Float32[9.14171e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.4769e-9, 1.0] (tracked)
Float32[1.0]


Float32[1.73578e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.27708e-12, 1.0] (tracked)
Float32[1.0]
Float32[2.82911e-13, 1.0] (tracked)
Float32[1.0]
Float32[3.31492e-14, 1.0] (tracked)
Float32[1.0]
Float32[9.159e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.92529e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.93703e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.18474e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.10358e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.59618e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.20654e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.88988e-12, 1.0] (tracked)
Float32[1.0]
Float32[3.58705e-13, 1.0] (tracked)
Float32[1.0]
Float32[4.20037e-14, 1.0] (tracked)
Float32[1.0]
Float32[8.67518e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.91644e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.11095e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.91187e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.28394e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.95086e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.84

Float32[1.0]
Float32[9.93655e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.14789e-13, 1.0] (tracked)
Float32[1.0]
Float32[6.08097e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.25664e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.51081e-8, 1.0] (tracked)
Float32[1.0]
Float32[4.42324e-9, 1.0] (tracked)
Float32[1.0]
Float32[6.63452e-10, 1.0] (tracked)
Float32[1.0]
Float32[9.37148e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.26484e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.61348e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.94743e-13, 1.0] (tracked)
Float32[1.0]
Float32[2.21384e-14, 1.0] (tracked)
Float32[1.0]
Float32[7.63935e-7, 0.999999] (tracked)
Float32[0.999999]
Float32[1.58933e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.22216e-8, 1.0] (tracked)
Float32[1.0]
Float32[5.93122e-9, 1.0] (tracked)
Float32[1.0]
Float32[9.08893e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.30096e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.77927e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.30136e-12, 1.0] (tracked)
Float32[

Float32[1.0]
Float32[1.38539e-13, 1.0] (tracked)
Float32[1.0]
Float32[4.23128e-7, 1.0] (tracked)
Float32[1.0]
Float32[8.15485e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.50922e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.39433e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.30653e-10, 1.0] (tracked)
Float32[1.0]
Float32[4.31138e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.34724e-12, 1.0] (tracked)
Float32[1.0]
Float32[6.28388e-13, 1.0] (tracked)
Float32[1.0]
Float32[5.28398e-7, 1.0] (tracked)
Float32[1.0]
Float32[1.07903e-7, 1.0] (tracked)
Float32[1.0]
Float32[2.13796e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.77747e-9, 1.0] (tracked)
Float32[1.0]
Float32[5.62747e-10, 1.0] (tracked)
Float32[1.0]
Float32[7.86275e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.04699e-11, 1.0] (tracked)
Float32[1.0]
Float32[1.31824e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.56979e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.75965e-14, 1.0] (tracked)
Float32[1.0]
Float32[6.35058e-7, 0.999999] (tracked)
Float32[0.999999]
Float3

Float32[3.2849e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.51386e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.24828e-8, 1.0] (tracked)
Float32[1.0]
Float32[2.10862e-9, 1.0] (tracked)
Float32[1.0]
Float32[3.03122e-10, 1.0] (tracked)
Float32[1.0]
Float32[4.08099e-11, 1.0] (tracked)
Float32[1.0]
Float32[5.22428e-12, 1.0] (tracked)
Float32[1.0]
Float32[6.32687e-13, 1.0] (tracked)
Float32[1.0]
Float32[7.2381e-14, 1.0] (tracked)
Float32[1.0]
Float32[7.78807e-15, 1.0] (tracked)
Float32[1.0]
Float32[4.44073e-7, 1.0] (tracked)
Float32[1.0]
Float32[9.22033e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.85046e-8, 1.0] (tracked)
Float32[1.0]
Float32[3.33531e-9, 1.0] (tracked)
Float32[1.0]
Float32[5.08262e-10, 1.0] (tracked)
Float32[1.0]
Float32[7.20492e-11, 1.0] (tracked)
Float32[1.0]
Float32[9.79875e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.26157e-12, 1.0] (tracked)
Float32[1.0]
Float32[1.52939e-13, 1.0] (tracked)
Float32[1.0]
Float32[1.74227e-14, 1.0] (tracked)
Float32[1.0]
Float32[2.94312e-7, 1.0] (trac

Float32[1.0]
Float32[3.02233e-7, 1.0] (tracked)
Float32[1.0]
Float32[5.60637e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.00807e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.58023e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.11696e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.6657e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.19226e-12, 1.0] (tracked)
Float32[1.0]
Float32[3.61782e-13, 1.0] (tracked)
Float32[1.0]
Float32[3.87936e-14, 1.0] (tracked)
Float32[1.0]
Float32[3.33815e-7, 1.0] (tracked)
Float32[1.0]
Float32[6.26214e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.14174e-8, 1.0] (tracked)
Float32[1.0]
Float32[1.86094e-9, 1.0] (tracked)
Float32[1.0]
Float32[2.54405e-10, 1.0] (tracked)
Float32[1.0]
Float32[3.23917e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.91827e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.48475e-13, 1.0] (tracked)
Float32[1.0]
Float32[4.85446e-14, 1.0] (tracked)
Float32[1.0]
Float32[2.62304e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.87508e-8, 1.0] (tracked)
Float32[1.0]
Float32[8.74864e-9

Float32[1.0]
Float32[5.23769e-8, 1.0] (tracked)
Float32[1.0]
Float32[9.36407e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.47125e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.96512e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.46372e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.94357e-12, 1.0] (tracked)
Float32[1.0]
Float32[3.31553e-13, 1.0] (tracked)
Float32[1.0]
Float32[3.5305e-14, 1.0] (tracked)
Float32[1.0]
MEAN RETURNS : 6.01159333077341
LAST 100 RETURNS : 5.024842476021182
Ep done
-----___Stats___-----
Policy Loss : 0.5995976945521699
Entropy Loss : -1.4192417e-10
Value Loss : 0.010737863995463134



----MAX REWRD SO FAR : 12.506037054906901---



EP : 51
Float32[2.48446e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.50629e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.89422e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.20842e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.57742e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.93395e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.25572e-12, 1.0] (tracked)
Float32[1.0]
Float

Float32[2.11239e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.83051e-11, 1.0] (tracked)
Float32[1.0]
Float32[3.61163e-12, 1.0] (tracked)
Float32[1.0]
Float32[4.33951e-13, 1.0] (tracked)
Float32[1.0]
Float32[4.91057e-14, 1.0] (tracked)
Float32[1.0]
Float32[5.21122e-15, 1.0] (tracked)
Float32[1.0]
Float32[2.21524e-7, 1.0] (tracked)
Float32[1.0]
Float32[4.25929e-8, 1.0] (tracked)
Float32[1.0]
Float32[7.88892e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.30373e-9, 1.0] (tracked)
Float32[1.0]
Float32[1.81611e-10, 1.0] (tracked)
Float32[1.0]
Float32[2.3688e-11, 1.0] (tracked)
Float32[1.0]
Float32[2.94175e-12, 1.0] (tracked)
Float32[1.0]
Float32[3.43763e-13, 1.0] (tracked)
Float32[1.0]
Float32[3.78456e-14, 1.0] (tracked)
Float32[1.0]
Float32[3.90985e-15, 1.0] (tracked)
Float32[1.0]
Float32[1.74792e-7, 1.0] (tracked)
Float32[1.0]
Float32[3.15223e-8, 1.0] (tracked)
Float32[1.0]
Float32[5.47664e-9, 1.0] (tracked)
Float32[1.0]
Float32[8.37238e-10, 1.0] (tracked)
Float32[1.0]
Float32[1.08415e-10, 1.0] (t

InterruptException: InterruptException:

In [None]:
policy(ones(4,1))

In [71]:
env = GymEnv(env_name)

GymEnv CartPole-v0
  TimeLimit
  r  = 0.0
  ∑r = 0.0

│   caller = show(::IOContext{Base.GenericIOBuffer{Array{UInt8,1}}}, ::GymEnv{PyCall.PyArray{Float64,1}}) at OpenAIGym.jl:64
└ @ OpenAIGym /home/shreyas/.julia/packages/OpenAIGym/wZkkM/src/OpenAIGym.jl:64
│   caller = show(::IOContext{Base.GenericIOBuffer{Array{UInt8,1}}}, ::GymEnv{PyCall.PyArray{Float64,1}}) at OpenAIGym.jl:65
└ @ OpenAIGym /home/shreyas/.julia/packages/OpenAIGym/wZkkM/src/OpenAIGym.jl:65


In [75]:
s = reset!(env)

4-element PyCall.PyArray{Float64,1}:
 -0.04849816918922241 
  0.013628561471056858
  0.006237321324826253
  0.04166733079767969 

In [81]:
function loss(x)
    action_probs = policy(x)
    mean(action_probs)
end

gs = Tracker.gradient(() -> loss(s),params(policy))

Grads(...)


In [82]:
policy.layers[1].W

Tracked 256×4 Array{Float32,2}:
  0.0463625    -0.0288894    0.0691652    -0.0266886 
  0.124687      0.0818793   -0.129135      0.204254  
 -0.106862      0.193595    -0.0882367    -0.0241373 
 -0.0685683     0.0427704    0.0284292     0.0874056 
 -0.176015      0.104862     0.0349711    -0.124782  
  0.0458958    -0.0517615    0.0304131     0.00226418
  0.0698005     0.0420365   -0.162521     -0.0820992 
 -0.0710014    -0.0629289    0.0741035     0.173773  
  0.036564      0.0159403    0.0240178    -0.0337437 
 -0.0883487    -0.074553     0.121145     -0.183758  
 -0.0945337    -0.00958645  -0.00795938   -0.0662095 
  0.0517599    -0.0620799   -0.000125911  -0.0471872 
 -0.0486801     0.006256     0.178224      0.092455  
  ⋮                                                  
  0.0060984     0.0074       0.149015      0.0837609 
 -0.12016      -0.0510779    0.182024      0.0255303 
 -0.142491     -0.138318    -0.078126     -0.124178  
  0.179972      0.124144    -0.0903058    -0.07220