# Functions DL

This notebook contains the functions used to solve the DSGE model using a Deep Learning (DL) approach. See the main notebook `DL_DSGE_QuantEcon.ipynb`.

In [None]:
def getSystemInfo():
    try:
        info={}
        info['platform']=platform.system()
        info['platform-release']=platform.release()
        info['platform-version']=platform.version()
        info['architecture']=platform.machine()
        info['hostname']=socket.gethostname()
        info['ip-address']=socket.gethostbyname(socket.gethostname())
        info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
        info['processor']=platform.processor()
        info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
        return json.dumps(info)
    except Exception as e:
        logging.exception(e)

json.loads(getSystemInfo())

In [None]:
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape) 

def convert_sparse_matrix_to_sparse_tensor(X):
    """Convert a scipy sparse matrix to a tf sparse tensor."""
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

# create a nn class
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,yhat,y):
        return torch.sqrt(self.mse(yhat,y))
    
# Gaussian quadrature rule
# See: https://chaospy.readthedocs.io/en/master/api/chaospy.generate_quadrature.html
def dist(order, distribution, rule = "gaussian", sp=True):
    #order=int(n**(1/d))-1
    x, w = chaospy.generate_quadrature(order, distribution, rule=(rule), sparse=sp)
    return x, w
    
# quantile function for lognormal
def quantile_norm(mu, sigma, p):
    return mu + np.sqrt(2*(sigma**2))*erfinv(2*p - 1)

# quantile function for lognormal
def quantile_lognorm(mu, sigma, p):
    return np.exp(mu + np.sqrt(2*(sigma**2))*erfinv(2*p - 1))


In [None]:
def generate_state_vec(nb_draws, params):
    
    if params.x_distribution == "Uniform":
        if params.use_Sobol == False:
            k_min_1 = ((params.x_low - params.x_high) * torch.rand(nb_draws) + params.x_high).unsqueeze(1)
        else:
            #Very slow if T is large
            k_min_1 = ((params.x_low - params.x_high) * params.soboleng.draw(nb_draws) + params.x_high)
    else:
        k_min_1 = torch.normal(mean=params.mean_K_linear, std=params.stdev_K_linear, size=(nb_draws,)).unsqueeze(1)
    # B. Values for a_t. Draw in ergodic set.
    a_t = params.distrib_a_torch.sample((nb_draws,))
    # Concat A and B
    k_min_1_and_a_t = torch.column_stack([k_min_1, a_t])
    
    return k_min_1_and_a_t

def generate_innovation_vec(nb_draws, params):
    innovation_vector = torch.normal(mean=0, std=params.σ_e, size=(nb_draws,)).unsqueeze(1)
    return innovation_vector

In [None]:
def Ξ_torch(neural_net, params): # objective function for training
    # neural net: a pytorch neural network
    # params: a params object
    
    # I.randomly drawing current states    
    k_min_1_and_a_t = generate_state_vec(params.T, params)
    
    # II. randomly drawing innovations
    e1 = generate_innovation_vec(params.T, params)
    e2 = generate_innovation_vec(params.T, params)
    
    # III. residuals for n random grid points under 2 realizations of shocks
    R1 = Residuals_torch(neural_net, params, k_min_1_and_a_t, e1)
    R2 = Residuals_torch(neural_net, params, k_min_1_and_a_t, e2)

    # construct all-in-one expectation operator
    R_squared = R1*R2 
    
    # Mean square error:
    return torch.mean(R_squared)

# Residual function
# v2 seems to result in slightly better performance
# v1 has the advantage of giving you directly the model's error (take the square root)
def Residuals_torch(neural_net, params, state_vec, e_r, tol=torch.tensor([1e-6]), penalty_factor = torch.tensor([100.0]), debug_n = False, version_resid = 1):
    # Current period
    k_min_1 = state_vec[:,0].unsqueeze(1)
    a_t =  state_vec[:,1].unsqueeze(1)
    # fix n to cst val 
    if debug_n == False:
        #consumption and hours worked today
        c_t, n_t = model_normalized(state_vec, neural_net, params)
    else:
        c_t, n_t = model_normalized(state_vec, neural_net, params)
        # overwritte value for n
        n_t = params.n_cst * torch.ones(c_t.shape)
    y_t = params.f(a_t, k_min_1, n_t) #production
    # BC implies investment decision
    x_t = y_t - c_t
    # capital stock next period
    # investment scaled by shock
    k_t = (1 - params.delta)*k_min_1 + x_t
    penalty_t = penalty_factor*torch.maximum(torch.tensor([0.0]), -k_t)**2
    k_t = torch.maximum(tol, k_t)
    # Next period
    # transition for a_t
    a_tomorrow = torch.exp(params.rho*torch.log(a_t) + e_r)
    input_tomorrow = torch.column_stack([k_t, a_tomorrow])
    if debug_n == False:
        # consumption and hours worked tomorrow
        c_tomorrow, n_tomorrow = model_normalized(input_tomorrow, neural_net, params)
    # fix n to cst val 
    else:
        c_tomorrow, n_tomorrow = model_normalized(input_tomorrow, neural_net, params)
        n_tomorrow = params.n_cst * torch.ones(c_t.shape)
    y_tomorrow = params.f(a_tomorrow, k_t, n_tomorrow) #production
    # BC implies investment decision
    x_tomorrow = y_tomorrow - c_tomorrow
    k_tomorrow = (1 - params.delta)*k_t + x_tomorrow
    penalty_tomorrow = penalty_factor*torch.maximum(torch.tensor([0.0]), -k_tomorrow )**2
    sum_penalty = 0.5*penalty_t + 0.5*penalty_tomorrow
    if torch.sum(sum_penalty) > 0.0:
        print(f"Sum penalty: {torch.sum(sum_penalty)}.")
    k_tomorrow = torch.maximum(tol, k_tomorrow)
    ## euler error
    # V1
    if version_resid == 1:
        ## euler error
        R = params.β*((c_t/c_tomorrow)*(params.α*(y_tomorrow/k_t) + 1.0 - params.delta)) - 1.0
    #V2. %diff between RHS and LHS side
    elif version_resid == 2:
        ## euler error
        RHS = 1/c_t
        LHS = params.β *((1/c_tomorrow)*(params.α*(y_tomorrow/k_t) + 1.0 - params.delta))
        R = (RHS - LHS)/(0.5*RHS + 0.5*LHS) + sum_penalty
    elif version_resid ==3:
        ## euler error unit-free
        R = 1/(params.β *((c_t/c_tomorrow)*(params.α*(y_tomorrow/k_t) + 1.0 - params.delta))) - 1.0
    else:
        raise("Error. version_resid unknwon.")
    return R #, R_FOC, R_FOC_tomorrow 

def simul_model(neural_net, params, N, tol=torch.tensor([1e-6])):
    """
    Function to simulate the model. 
    N: length simulation
    """
    with torch.no_grad(): 
        a_t = torch.zeros(N).unsqueeze(1)
        #k_min_1 = torch.zeros(N).unsqueeze(1)
        y_t = torch.zeros(N).unsqueeze(1)
        k_t = torch.zeros(N).unsqueeze(1)
        c_t = torch.zeros(N).unsqueeze(1)
        n_t = torch.zeros(N).unsqueeze(1)
        x_t = torch.zeros(N).unsqueeze(1)
        # start at the non stochastic SS
        a_t[0] = params.z_ss_tensor #non-stochastic SS
        #k_min_1[0] = params.k_ss_tensor #non-stochastic SS
        k_t[0] = params.k_ss_tensor #non-stochastic SS
        y_t[0] = params.y_ss_tensor
        c_t[0] = params.c_ss_tensor
        n_t[0] = params.n_ss_tensor
        x_t[0] = y_t[0] - c_t[0]
        # Innovations
        e_t = torch.normal(mean=0, std=params.σ_e, size=(N,)).unsqueeze(1)
        # Generate path for a(t)
        for i in range(1, N):
            a_t[i] = torch.exp(params.rho*torch.log(a_t[i-1]) + e_t[i])
        # Simulate economy
        for i in range(1, N):
            # Concat A and B
            state_vec = torch.column_stack([k_t[i-1], a_t[i]])
            c_t[i], n_t[i] = model_normalized(state_vec, neural_net, params)
            y_t[i] = params.f(a_t[i], k_t[i-1], n_t[i]) #production
            # BC implies investment decision
            x_t[i] = y_t[i] - c_t[i]
            # capital stock next period
            # investment scaled by shock
            k_t[i] = (1 - params.delta)*k_t[i-1] + x_t[i]
    names = ['Z', 'K', 'Y', 'C', 'N']
    series = torch.column_stack([a_t, k_t, y_t, c_t, n_t])
    return series, names


In [None]:
def evaluate_accuracy_pytorch_MC_frozen(neural_net, params, tol=torch.tensor([1e-6]), use_linear = True, debug = False, distance_f = torch.abs):
    """
    Function to evaluate the accuracy using Monte Carlo for the expectation
    Use a pre-determined series of shocks to approximate the expectations
    """
    with torch.no_grad(): 
        #-------------------
        # Evaluate the model
        #-------------------
        ## Period t
        if use_linear == False:
            c_t, n_t = model_normalized(params.k_min_1_and_a_t_accuracy, neural_net, params)
            # squeeze
            c_t = c_t.squeeze(1)
            n_t = n_t.squeeze(1)
        else:
            c_t = params.c_linear(params.k_min1_accuracy, params.a_t_accuracy)
            n_t = params.n_linear(params.k_min1_accuracy, params.a_t_accuracy)
        y_t = params.f(params.a_t_accuracy, params.k_min1_accuracy, n_t) #production
        if debug == True:
            n_t = params.n_cst*torch.ones(c_t.shape)
            y_t = params.f(params.a_t_accuracy, params.k_min1_accuracy, n_t) #production
            c_t = params.c_delta_one(y_t)
        # BC implies investment decision
        x_t = y_t - c_t
        # capital stock next period
        # investment scaled by shock
        k_t = (1 - params.delta)*params.k_min1_accuracy + x_t
        k_t = torch.maximum(tol, k_t)

        # repeat the vectors (MN*N, 1)
        c_t_repeated = torch.kron(c_t, params.repeat_vector_accuracy)
        n_t_repeated = torch.kron(n_t, params.repeat_vector_accuracy)
        k_t_repeated = torch.kron(k_t, params.repeat_vector_accuracy)

        ## Period t+1
        # vectors are (MN*N, 1)
        input_tomorrow = torch.column_stack([k_t_repeated, params.a_tomorrow_accuracy])
        # consumption and hours worked tomorrow
        if use_linear == False:
            c_tomorrow, n_tomorrow = model_normalized(input_tomorrow, neural_net, params)
            # squeeze
            c_tomorrow = c_tomorrow.squeeze(1)
            n_tomorrow = n_tomorrow.squeeze(1)
        else:
            c_tomorrow = params.c_linear(k_t_repeated, params.a_tomorrow_accuracy)
            n_tomorrow = params.n_linear(k_t_repeated, params.a_tomorrow_accuracy)
        y_tomorrow = params.f(params.a_tomorrow_accuracy, k_t_repeated, n_tomorrow) #production
        if debug == True:
            n_tomorrow = params.n_cst*torch.ones(c_tomorrow.shape)
            y_tomorrow = params.f(params.a_tomorrow_accuracy, k_t_repeated, n_tomorrow) #production
            c_tomorrow = params.c_delta_one(y_tomorrow)
        # BC implies investment decision
        x_tomorrow = y_tomorrow - c_tomorrow
        k_tomorrow = (1 - params.delta)*k_t_repeated + x_tomorrow
        k_tomorrow = torch.maximum(tol, k_tomorrow)

        # Sparse matrix multiplication
        vals = ((c_t_repeated/c_tomorrow)*(params.α*(y_tomorrow/k_t_repeated) + 1.0 - params.delta)).unsqueeze(1)
        # Calculate expectation for each value of the state space (kt-1, at)
        expect_t = params.β*torch.sparse.mm(params.W_accuracy, vals).squeeze(1)
        euler_resid = distance_f(1.0 - (1.0/expect_t)) 
        # Euler error: 
    return euler_resid.numpy()

def evaluate_accuracy_pytorch_Gaussian_frozen(neural_net, params, tol=torch.tensor([1e-6]), use_linear = True, debug = False, distance_f = torch.abs):
    """
    Function to evaluate the accuracy using Gaussian quadrature for the expectation
    Use a pre-determined series of shocks to approximate the expectations
    """
    with torch.no_grad(): 
        #-------------------
        # Evaluate the model
        #-------------------
        ## Period t
        if use_linear == False:
            c_t, n_t = model_normalized(params.k_min_1_and_a_t_accuracy, neural_net, params)
            # squeeze
            c_t = c_t.squeeze(1)
            n_t = n_t.squeeze(1)
        else:
            c_t = params.c_linear(params.k_min1_accuracy, params.a_t_accuracy)
            n_t = params.n_linear(params.k_min1_accuracy, params.a_t_accuracy)
        y_t = params.f(params.a_t_accuracy, params.k_min1_accuracy, n_t) #production
        if debug == True:
            n_t = params.n_cst*torch.ones(c_t.shape)
            y_t = params.f(params.a_t_accuracy, params.k_min1_accuracy, n_t) #production
            c_t = params.c_delta_one(y_t)
        # BC implies investment decision
        x_t = y_t - c_t
        # capital stock next period
        # investment scaled by shock
        k_t = (1 - params.delta)*params.k_min1_accuracy + x_t
        k_t = torch.maximum(tol, k_t)

        # repeat the vectors of size (M,1) to be (MN,1) with (f1, f1, ..., f1, f2, f2, ..., fM, ... fM)
        # N is the number of nodes for the gaussian quadrature
        c_t_repeated = c_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        n_t_repeated = n_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        k_t_repeated = k_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        a_t_repeated = params.a_t_accuracy.repeat_interleave(params.n_nodes).unsqueeze(1)
        #print(f"shape c_t_repeated {c_t_repeated.shape}")
        #print(f"shape a tomorrow: {n_t_repeated.shape}")
        #print(f"shape k_t_repeated: {k_t_repeated.shape}")
        #print(f"shape a_t_repeated: {a_t_repeated.shape}")
        # repeat (n1, n2, n3, n4, n1, n2, n3, n4, ... n1, n2, n3, n4).
        # shape (MN, 1)
        nodes_torch_repeated = params.nodes_torch.repeat(len(c_t)).unsqueeze(1)
        # repeat (f1, f1, ..., f1, f2, f2, ..., fM, ... fM)
        # shape (MN, 1)
        # nodes_torch_repeated = params.nodes_torch.repeat_interleave(len(c_t)).unsqueeze(1)
        # a(t) next period (MN, 1)
        a_tomorrow = torch.exp(torch.log(a_t_repeated)*params.rho + nodes_torch_repeated)

        ## Period t+1
        # vectors are (MN*N, 1)
        input_tomorrow = torch.column_stack([k_t_repeated, a_tomorrow])
        #print(f"shape input_tomorrow: {input_tomorrow.shape}")
        # consumption and hours worked tomorrow
        if use_linear == False:
            c_tomorrow, n_tomorrow = model_normalized(input_tomorrow.float(), neural_net, params)
            # squeeze
            #c_tomorrow = c_tomorrow.squeeze(1)
            #n_tomorrow = n_tomorrow.squeeze(1)
        else:
            c_tomorrow = params.c_linear(k_t_repeated, a_tomorrow)
            n_tomorrow = params.n_linear(k_t_repeated, a_tomorrow)
        y_tomorrow = params.f(a_tomorrow, k_t_repeated, n_tomorrow) #production

        if debug == True:
            n_tomorrow = params.n_cst*torch.ones(c_tomorrow.shape)
            y_tomorrow = params.f(a_tomorrow, k_t_repeated, n_tomorrow) #production
            c_tomorrow = params.c_delta_one(y_tomorrow)
        # BC implies investment decision
        x_tomorrow = y_tomorrow - c_tomorrow
        k_tomorrow = (1 - params.delta)*k_t_repeated + x_tomorrow
        k_tomorrow = torch.maximum(tol, k_tomorrow)
        #print(f"shape c tomorrow: {c_tomorrow.shape}")
        #print(f"shape y tomorrow: {y_tomorrow.shape}")
        #print(f"shape a tomorrow: {a_tomorrow.shape}")
        #print(f"shape k tomorrow: {k_tomorrow.shape}")
        #print(f"shape n tomorrow: {n_tomorrow.shape}")
        #print(f"shape nodes_torch_repeated: {nodes_torch_repeated.shape}")
        # Sparse matrix multiplication
        vals = ((c_t_repeated/c_tomorrow)*(params.α*(y_tomorrow/k_t_repeated) + 1.0 - params.delta)).float()
        # Calculate expectation for each value of the state space (kt-1, at)
        expect_t = params.β*torch.sparse.mm(params.W_gaussian, vals).squeeze(1)
        euler_resid = distance_f(1.0 - (1.0/expect_t))
        #-----------------
        # Euler error: 
    return euler_resid.numpy()
    

In [1]:
def evaluate_accuracy_pytorch_MC(neural_net, params, k_min1_accuracy, a_t_accuracy, nb_innovations, tol=torch.tensor([1e-6]), use_linear = True, debug = False, distance_f = torch.abs):
    """
    Function to evaluate the accuracy using Monte Carlo for the expectation
    Use new draws
    """
    # Generate nb_draws shocks for each state
    nb_states = len(k_min1_accuracy)
    nb_innovations_tot = int(nb_states*nb_innovations)
    e_r = generate_innovation_vec(nb_innovations_tot, params)
    k_min_1_and_a_t_accuracy = torch.column_stack([k_min1_accuracy, a_t_accuracy])
    
    # Create integration matrix if necessary
    if (nb_states != params.M_accuracy) | (nb_innovations != params.N_accuracy):
        A = sparse.eye(nb_states)
        B = sparse.csr_matrix(np.ones(nb_innovations)/nb_innovations)
        # Sparse kronecker product. Then convert to pytorch sparse
        W_accuracy = sparse_mx_to_torch_sparse_tensor(sparse.kron(A, B))
    else:
        W_accuracy = params.W_accuracy
    
    with torch.no_grad(): 
        #-------------------
        # Evaluate the model
        #-------------------
        ## Period t
        if use_linear == False:
            c_t, n_t = model_normalized(k_min_1_and_a_t_accuracy, neural_net, params)
            # squeeze
            c_t = c_t.squeeze(1)
            n_t = n_t.squeeze(1)
        else:
            c_t = params.c_linear(k_min1_accuracy, a_t_accuracy)
            n_t = params.n_linear(k_min1_accuracy, a_t_accuracy)
        y_t = params.f(a_t_accuracy, k_min1_accuracy, n_t) #production
        if debug == True:
            n_t = params.n_cst*torch.ones(c_t.shape)
            y_t = params.f(a_t_accuracy, k_min1_accuracy, n_t) #production
            c_t = params.c_delta_one(y_t)
        # BC implies investment decision
        x_t = y_t - c_t
        # capital stock next period
        # investment scaled by shock
        k_t = (1 - params.delta)*k_min1_accuracy + x_t
        k_t = torch.maximum(tol, k_t)

        # repeat the vectors (MN*N, 1)
        c_t_repeated = c_t.repeat_interleave(nb_innovations).unsqueeze(1)
        n_t_repeated = n_t.repeat_interleave(nb_innovations).unsqueeze(1)
        k_t_repeated = k_t.repeat_interleave(nb_innovations).unsqueeze(1)
        a_t_repeated = a_t_accuracy.repeat_interleave(nb_innovations).unsqueeze(1)
        
        ## Period t+1
        # vectors are (MN*N, 1)
        a_tomorrow_accuracy = torch.exp(params.rho*torch.log(a_t_repeated) + e_r)
        input_tomorrow = torch.column_stack([k_t_repeated, a_tomorrow_accuracy])
        
        # consumption and hours worked tomorrow
        if use_linear == False:
            c_tomorrow, n_tomorrow = model_normalized(input_tomorrow, neural_net, params)
            # squeeze
            c_tomorrow = c_tomorrow
            n_tomorrow = n_tomorrow
        else:
            c_tomorrow = params.c_linear(k_t_repeated, a_tomorrow_accuracy)
            n_tomorrow = params.n_linear(k_t_repeated, a_tomorrow_accuracy)
        y_tomorrow = params.f(a_tomorrow_accuracy, k_t_repeated, n_tomorrow) #production
        if debug == True:
            n_tomorrow = params.n_cst*torch.ones(c_tomorrow.shape)
            y_tomorrow = params.f(a_tomorrow_accuracy, k_t_repeated, n_tomorrow) #production
            c_tomorrow = params.c_delta_one(y_tomorrow)
        # BC implies investment decision
        x_tomorrow = y_tomorrow - c_tomorrow
        k_tomorrow = (1 - params.delta)*k_t_repeated + x_tomorrow
        k_tomorrow = torch.maximum(tol, k_tomorrow)

        # Sparse matrix multiplication
        vals = (c_t_repeated/c_tomorrow)*(params.α*(y_tomorrow/k_t_repeated) + 1.0 - params.delta)

        # Calculate expectation for each value of the state space (kt-1, at)
        expect_t = params.β*torch.sparse.mm(W_accuracy, vals).squeeze(1)
        euler_resid = distance_f(1.0 - (1.0/expect_t)) 
        
        # Euler error: 
    return euler_resid.numpy()

def evaluate_accuracy_pytorch_Gaussian(neural_net, params, k_min1_accuracy, a_t_accuracy, tol=torch.tensor([1e-6]), use_linear = True, debug = False, distance_f = torch.abs):
    """
    Function to evaluate the accuracy using Gaussian quadrature for the expectation.
    Have an input for kt-1 and at
    """
    k_min_1_and_a_t_accuracy = torch.column_stack([k_min1_accuracy, a_t_accuracy])

    if len(k_min1_accuracy) != params.M_accuracy:
        # To calculate expectations
        M_accuracy = len(k_min1_accuracy)
        A_gaussian = sparse.eye(M_accuracy)
        W_gaussian_local = sparse_mx_to_torch_sparse_tensor(sparse.kron(A_gaussian, params.B_gaussian))
    else:
        W_gaussian_local = params.W_gaussian

    with torch.no_grad(): 
        #-------------------
        # Evaluate the model
        #-------------------
        ## Period t
        if use_linear == False:
            c_t, n_t = model_normalized(k_min_1_and_a_t_accuracy, neural_net, params)
            # squeeze
            c_t = c_t.squeeze(1)
            n_t = n_t.squeeze(1)
        else:
            c_t = params.c_linear(k_min1_accuracy, a_t_accuracy)
            n_t = params.n_linear(k_min1_accuracy, a_t_accuracy)
        y_t = params.f(a_t_accuracy, k_min1_accuracy, n_t) #production
        if debug == True:
            n_t = params.n_cst*torch.ones(c_t.shape)
            y_t = params.f(a_t_accuracy, k_min1_accuracy, n_t) #production
            c_t = params.c_delta_one(y_t)
        # BC implies investment decision
        x_t = y_t - c_t
        # capital stock next period
        # investment scaled by shock
        k_t = (1 - params.delta)*k_min1_accuracy + x_t
        k_t = torch.maximum(tol, k_t)

        # repeat the vectors of size (M,1) to be (MN,1) with (f1, f1, ..., f1, f2, f2, ..., fM, ... fM)
        # N is the number of nodes for the gaussian quadrature
        c_t_repeated = c_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        n_t_repeated = n_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        k_t_repeated = k_t.repeat_interleave(params.n_nodes).unsqueeze(1)
        a_t_repeated = a_t_accuracy.repeat_interleave(params.n_nodes).unsqueeze(1)
        #print(f"shape c_t_repeated {c_t_repeated.shape}")
        #print(f"shape a tomorrow: {n_t_repeated.shape}")
        #print(f"shape k_t_repeated: {k_t_repeated.shape}")
        #print(f"shape a_t_repeated: {a_t_repeated.shape}")
        # repeat (n1, n2, n3, n4, n1, n2, n3, n4, ... n1, n2, n3, n4).
        # shape (MN, 1)
        nodes_torch_repeated = params.nodes_torch.repeat(len(c_t)).unsqueeze(1)
        # repeat (f1, f1, ..., f1, f2, f2, ..., fM, ... fM)
        # shape (MN, 1)
        # nodes_torch_repeated = params.nodes_torch.repeat_interleave(len(c_t)).unsqueeze(1)
        # a(t) next period (MN, 1)
        a_tomorrow = torch.exp(torch.log(a_t_repeated)*params.rho + nodes_torch_repeated)

        ## Period t+1
        # vectors are (MN*N, 1)
        input_tomorrow = torch.column_stack([k_t_repeated, a_tomorrow])
        #print(f"shape input_tomorrow: {input_tomorrow.shape}")
        # consumption and hours worked tomorrow
        if use_linear == False:
            c_tomorrow, n_tomorrow = model_normalized(input_tomorrow.float(), neural_net, params)
            # squeeze
            #c_tomorrow = c_tomorrow.squeeze(1)
            #n_tomorrow = n_tomorrow.squeeze(1)
        else:
            c_tomorrow = params.c_linear(k_t_repeated, a_tomorrow)
            n_tomorrow = params.n_linear(k_t_repeated, a_tomorrow)
        y_tomorrow = params.f(a_tomorrow, k_t_repeated, n_tomorrow) #production

        if debug == True:
            n_tomorrow = params.n_cst*torch.ones(c_tomorrow.shape)
            y_tomorrow = params.f(a_tomorrow, k_t_repeated, n_tomorrow) #production
            c_tomorrow = params.c_delta_one(y_tomorrow)
        # BC implies investment decision
        x_tomorrow = y_tomorrow - c_tomorrow
        k_tomorrow = (1 - params.delta)*k_t_repeated + x_tomorrow
        k_tomorrow = torch.maximum(tol, k_tomorrow)
        #print(f"shape c tomorrow: {c_tomorrow.shape}")
        #print(f"shape y tomorrow: {y_tomorrow.shape}")
        #print(f"shape a tomorrow: {a_tomorrow.shape}")
        #print(f"shape k tomorrow: {k_tomorrow.shape}")
        #print(f"shape n tomorrow: {n_tomorrow.shape}")
        #print(f"shape nodes_torch_repeated: {nodes_torch_repeated.shape}")
        # Sparse matrix multiplication
        vals = ((c_t_repeated/c_tomorrow)*(params.α*(y_tomorrow/k_t_repeated) + 1.0 - params.delta)).float()
        #print(f"shape vals: {vals.shape}")
        # Calculate expectation for each value of the state space (kt-1, at)
        expect_t = params.β*torch.sparse.mm(W_gaussian_local, vals).squeeze(1)
        euler_resid = distance_f(1.0 - (1.0/expect_t))
        # Evaluate errors
        #-----------------
        # Euler error: 
    return euler_resid.numpy()

NameError: name 'torch' is not defined

In [None]:
# Objective function to initialize the model
# Fit the model when delta = 1
def Ξ_torch_init(neural_net, params, init_type = 1): # objective function for training

    # I.randomly drawing current states    
    # A. values for beginning of period capital (kt-1)
    if params.x_distribution == "Uniform":
        k_min_1 = ((params.x_low - params.x_high) * torch.rand(params.T) + params.x_high).unsqueeze(1)
    else:
        k_min_1 = torch.normal(mean=params.mean_K_linear, std=params.stdev_K_linear, size=(params.T,)).unsqueeze(1)
    # B. Values for a_t. Draw in ergodic set.
    a_t = params.distrib_a_torch.sample((params.T,))
    # Concat A and B
    k_min_1_and_a_t = torch.column_stack([k_min_1, a_t])

    c_t, n_t = model_normalized(k_min_1_and_a_t, neural_net, params)
    
    #output:
    #V1. Predict the solution when delta=1.0
    if init_type ==1:
        y_t = params.f(a_t, k_min_1, params.n_cst)
        c_delta_one = params.c_delta_one(y_t)
        R = torch.mean((c_t - c_delta_one)**2) #+ torch.mean((n_t - params.n_cst)**2)
    #V2. Predict the SS value
    elif init_type ==2:
        R = torch.mean((c_t - params.c_ss_tensor)**2) #+ torch.mean((n_t - params.n_ss_tensor)**2)
    elif init_type ==3:
    #V3. Predict the linearized model
        ct_linear = params.c_linear(k_min_1, a_t)
        nt_linear = params.n_linear(k_min_1, a_t)
        R = torch.mean((c_t - ct_linear)**2) #+ torch.mean((n_t - nt_linear)**2)
    elif init_type ==4:
    #V4. sqrt(income)
        y_t = params.f(a_t, k_min_1, params.n_cst)
        #c_target = torch.log(y_t)
        c_target = torch.sqrt(y_t)
        R = torch.mean((c_t - c_target)**2) #+ torch.mean((n_t - 0.5*nt_linear)**2)
    else:
        raise("Unknown init_type")
    return R

def create_optimizer(neural_net, params):
    if params.optimizer == "Adam":
        optimizer = torch.optim.Adam(neural_net.parameters(), lr=params.lr) 
    elif params.optimizer == "SGD":
        optimizer = torch.optim.SGD(neural_net.parameters(), params.lr)
    elif params.optimizer == "SWA":
        base_opt = torch.optim.Adam(neural_net.parameters(), lr=params.lr) 
        optimizer = SWA(base_opt, swa_start=params.swa_start, swa_freq=params.swa_freq, swa_lr=params.lr)
    else:
        raise ValueError(f"optimizer {params.optimizer} unknown")
    return optimizer

In [None]:
def numpy_flat(a):
    """
    Function to flatten a list
    """
    return list(np.array(a).flat)

In [None]:
def calculate_effective_lr(optimizer, beta1=0.9, beta2=0.999, epsilon=1e-8):
    """
    Function to calculate the effective learning rate used in Adam update
    """
    #effective_lr_list = []
    effective_lr = 0
    for param_group in optimizer.param_groups:
        for param in param_group['params']:
            if param.grad is not None:
                state = optimizer.state[param]
                if 'step' in state and state['step'] > 0:
                    m_t = state['exp_avg'] ## Exponential moving average of gradient values
                    v_t = state['exp_avg_sq'] # Exponential moving average of squared gradient values
                    # see: https://arxiv.org/pdf/1412.6980.pdf
                    # page 2
                    step_size = param_group['lr'] * np.sqrt(1 - beta2 ** state['step']) / (1 - beta1 ** state['step'])
                    effective_lr = step_size / (torch.sqrt(v_t) + epsilon)
                    # learning rate is applied to (1-beta1)*gradient
                    effective_lr = np.mean((1 - beta1)*effective_lr.mean().item())
                    #effective_lr = np.mean(effective_lr.mean().item())
    return effective_lr
