In [24]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

np.set_printoptions(suppress=True)

import warnings
#suppress warnings
warnings.filterwarnings('ignore')

theta_list = []
checkpoint_list = []



year = [2014]
period = ['may-jun'] # inputs: 'may-jun', 'jun-jul'

for year in year:
    
    # Import data
    if year == 2014:
        X = np.load('../data/processed/data_2014.npz')
        N = X['N']
        
    elif year == 2015:
        X = np.load('../data/processed/data_2015.npz')
        N = X['N']
        
    elif year == 2016:
        X = np.load('../data/processed/data_2016.npz')
        N = X['N']
        
    elif year == 2017:
        X = np.load('../data/processed/data_2017.npz')
        N = X['N']

    dist = X['distance']
    tI1 = X['tI1'].reshape(N,1)
    tI2 = X['tI2'].reshape(N,1)
    sI2 = X['sI2'].reshape(N,1)
    
    y_apr = X['y_apr'].reshape(N,1)
    y_may = X['y_may'].reshape(N,1)
    y_jun = X['y_jun'].reshape(N,1)
    y_jul = X['y_jul'].reshape(N,1)

    n_apr = X['n_apr'].reshape(N,1)
    n_may = X['n_may'].reshape(N,1)
    n_jun = X['n_jun'].reshape(N,1)
    n_jul = X['n_jul'].reshape(N,1)

    a_apr = X['a_apr'].reshape(N,1)
    a_may = X['a_may'].reshape(N,1)
    a_jun = X['a_jun'].reshape(N,1)
    a_jul = X['a_jul'].reshape(N,1)

    w_apr = X['wind_apr']
    w_may = X['wind_may']
    w_jun = X['wind_jun']
    w_jul = X['wind_jul']

    sI1_apr = X['sI1_apr'].reshape(N,1)
    sI1_may = X['sI1_may'].reshape(N,1)
    sI1_jun = X['sI1_jun'].reshape(N,1)
    sI1_jul = X['sI1_jul'].reshape(N,1)

    s_apr = X['s_apr'].reshape(N,1)
    s_may = X['s_may'].reshape(N,1)
    s_jun = X['s_jun'].reshape(N,1)
    s_jul = X['s_jul'].reshape(N,1)


    # Function to normalize the data
    def norm(x):
        
        return (x - np.min(x)) / (np.max(x) - np.min(x))
    
    # Normalize the data
    dist = norm(dist)
    
    a_apr = norm(a_apr)
    a_may = norm(a_may)
    a_jun = norm(a_jun)
    a_jul = norm(a_jul)
    
    
    for period in period:

        if period == 'may-jun':
            y = y_jun
            n = n_jun
            y_lag = y_may
            n_lag = n_may
            a_lag = a_may
            w_lag = w_may
            sI1_lag = sI1_may
            s_lag = s_may
        
        elif period == 'jun-jul':
            
            y = y_jul
            n = n_jul
            y_lag = y_jun
            n_lag = n_jun
            a_lag = a_jun
            w_lag = w_jun
            sI1_lag = sI1_jun
            s_lag = s_jun
        

        # Define the function eta() which takes input parameters theta and returns the log-odds of disease for each yard i in current time period
        def eta(theta):
                
            beta1, beta2, delta1, delta2, gamma1, gamma2, alpha1, alpha2, eta11, eta12, eta21, eta22 = theta
            
            beta1_array = np.full((N,1), beta1)
            beta2_array = np.full((N,1), beta2)
            
            auto_infection1 = delta1 * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            auto_infection2 = delta2 * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            
            dispersal1 = []
            dispersal2 = []
            
            for i in range(0, N):
                
                dispersal_array = ((a_lag * (y_lag / n_lag)) * (w_lag[:, i].reshape(N,1)))
                dispersal_array1 = dispersal_array * np.exp(-eta21 * s_lag) * np.exp(-alpha1 * dist[:, i].reshape(N,1)) * sI1_lag
                dispersal_array2 = dispersal_array * np.exp(-eta22 * s_lag) * np.exp(-alpha2 * dist[:, i].reshape(N,1)) * sI2
                dispersal_component1_i = gamma1 * (np.sum(dispersal_array1) - dispersal_array1[i][0])
                dispersal_component2_i = gamma2 * (np.sum(dispersal_array2) - dispersal_array2[i][0])
                
                dispersal1.append(dispersal_component1_i)
                dispersal2.append(dispersal_component2_i)
            
            dispersal1 = np.array(dispersal1).reshape(N,1)
            dispersal2 = np.array(dispersal2).reshape(N,1)
            
            eta = tI1 * (beta1_array + auto_infection1 + dispersal1) + tI2 * (beta2_array + auto_infection2 + dispersal2)
            
            return eta


        def costFunction(theta): 
            
            neg_log_likelihood = -(1/N) * np.sum(y * eta(theta) - n * np.log(1 + np.exp(eta(theta))))

            return neg_log_likelihood


        def partial(theta):
            
            beta1, beta2, delta1, delta2, gamma1, gamma2, alpha1, alpha2, eta11, eta12, eta21, eta22 = theta
            
            d_beta1 = tI1
            d_beta2 = tI2
            
            d_delta1 = tI1 * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_delta2 = tI2 * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            
            d_gamma1 = []
            d_gamma2 = []
            d_alpha1 = []
            d_alpha2 = []
            d_eta21 = []
            d_eta22 = []
            
            for i in range(0, N):
                
                mask = np.arange(N) != i # mask out the current yard i
            
                d_gamma1_i = tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask])
                d_gamma2_i = tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask])
                
                d_alpha1_i = -gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_alpha2_i = -gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * dist[:, i][mask].reshape(N-1, 1))
                
                d_eta21_i = -gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * s_lag[mask])
                d_eta22_i = -gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * s_lag[mask])
            
                d_gamma1.append(d_gamma1_i)
                d_gamma2.append(d_gamma2_i)
                d_alpha1.append(d_alpha1_i)
                d_alpha2.append(d_alpha2_i)
                d_eta21.append(d_eta21_i)
                d_eta22.append(d_eta22_i)
            
            d_gamma1 = np.array(d_gamma1).reshape(N,1)
            d_gamma2 = np.array(d_gamma2).reshape(N,1)
            d_alpha1 = np.array(d_alpha1).reshape(N,1)
            d_alpha2 = np.array(d_alpha2).reshape(N,1)
            d_eta21 = np.array(d_eta21).reshape(N,1)
            d_eta22 = np.array(d_eta22).reshape(N,1)
            
            
            d_eta11 = -tI1 * delta1 * s_lag * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_eta12 = -tI2 * delta2 * s_lag * (y_lag / n_lag) * np.exp(-eta12 * s_lag)



            grad_entries = np.array([d_beta1, d_beta2, d_delta1, d_delta2, d_gamma1, d_gamma2, d_alpha1, d_alpha2, d_eta11, d_eta12, d_eta21, d_eta22])
            
            return grad_entries

        def partial_by_partial(theta):
            
            beta1, beta2, delta1, delta2, gamma1, gamma2, alpha1, alpha2, eta11, eta12, eta21, eta22 = theta
            
            d_beta1 = tI1
            d_beta2 = tI2
            
            d_delta1 = tI1 * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_delta2 = tI2 * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            
            d_eta11 = -tI1 * delta1 * s_lag * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_eta12 = -tI2 * delta2 * s_lag * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            
            d_gamma1 = []
            d_gamma2 = []
            d_alpha1 = []
            d_alpha2 = []
            d_eta21 = []
            d_eta22 = []
            
            for i in range(0, N):
                
                mask = np.arange(N) != i # mask out the current yard i
            
                d_gamma1_i = tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask])
                d_gamma2_i = tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask])
                
                d_alpha1_i = -gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_alpha2_i = -gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * dist[:, i][mask].reshape(N-1, 1))

                d_eta21_i = -gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * s_lag[mask])
                d_eta22_i = -gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * s_lag[mask])
                
                d_gamma1.append(d_gamma1_i)
                d_gamma2.append(d_gamma2_i)
                d_alpha1.append(d_alpha1_i)
                d_alpha2.append(d_alpha2_i)
                d_eta21.append(d_eta21_i)
                d_eta22.append(d_eta22_i)
            
            d_gamma1 = np.array(d_gamma1).reshape(N,1)
            d_gamma2 = np.array(d_gamma2).reshape(N,1)
            d_alpha1 = np.array(d_alpha1).reshape(N,1)
            d_alpha2 = np.array(d_alpha2).reshape(N,1)
            d_eta21 = np.array(d_eta21).reshape(N,1)
            d_eta22 = np.array(d_eta22).reshape(N,1)
                

            grad_entries = np.array([[d_beta1*d_beta1, d_beta2*d_beta1, d_delta1*d_beta1, d_delta2*d_beta1, d_gamma1*d_beta1, d_gamma2*d_beta1, d_alpha1*d_beta1, d_alpha2*d_beta1, d_eta11*d_beta1, d_eta12*d_beta1, d_eta21*d_beta1, d_eta22*d_beta1],
                                    [d_beta1*d_beta2, d_beta2*d_beta2, d_delta1*d_beta2, d_delta2*d_beta2, d_gamma1*d_beta2, d_gamma2*d_beta2, d_alpha1*d_beta2, d_alpha2*d_beta2, d_eta11*d_beta2, d_eta12*d_beta2, d_eta21*d_beta2, d_eta22*d_beta2],
                                    [d_beta1*d_delta1, d_beta2*d_delta1, d_delta1*d_delta1, d_delta2*d_delta1, d_gamma1*d_delta1, d_gamma2*d_delta1, d_alpha1*d_delta1, d_alpha2*d_delta1, d_eta11*d_delta1, d_eta12*d_delta1, d_eta21*d_delta1, d_eta22*d_delta1],
                                    [d_beta1*d_delta2, d_beta2*d_delta2, d_delta1*d_delta2, d_delta2*d_delta2, d_gamma1*d_delta2, d_gamma2*d_delta2, d_alpha1*d_delta2, d_alpha2*d_delta2, d_eta11*d_delta2, d_eta12*d_delta2, d_eta21*d_delta2, d_eta22*d_delta2],
                                    [d_beta1*d_gamma1, d_beta2*d_gamma1, d_delta1*d_gamma1, d_delta2*d_gamma1, d_gamma1*d_gamma1, d_gamma2*d_gamma1, d_alpha1*d_gamma1, d_alpha2*d_gamma1, d_eta11*d_gamma1, d_eta12*d_gamma1, d_eta21*d_gamma1, d_eta22*d_gamma1],
                                    [d_beta1*d_gamma2, d_beta2*d_gamma2, d_delta1*d_gamma2, d_delta2*d_gamma2, d_gamma1*d_gamma2, d_gamma2*d_gamma2, d_alpha1*d_gamma2, d_alpha2*d_gamma2, d_eta11*d_gamma2, d_eta12*d_gamma2, d_eta21*d_gamma2, d_eta22*d_gamma2],
                                    [d_beta1*d_alpha1, d_beta2*d_alpha1, d_delta1*d_alpha1, d_delta2*d_alpha1, d_gamma1*d_alpha1, d_gamma2*d_alpha1, d_alpha1*d_alpha1, d_alpha2*d_alpha1, d_eta11*d_alpha1, d_eta12*d_alpha1, d_eta21*d_alpha1, d_eta22*d_alpha1],
                                    [d_beta1*d_alpha2, d_beta2*d_alpha2, d_delta1*d_alpha2, d_delta2*d_alpha2, d_gamma1*d_alpha2, d_gamma2*d_alpha2, d_alpha1*d_alpha2, d_alpha2*d_alpha2, d_eta11*d_alpha2, d_eta12*d_alpha2, d_eta21*d_alpha2, d_eta22*d_alpha2],
                                    [d_beta1*d_eta11, d_beta2*d_eta11, d_delta1*d_eta11, d_delta2*d_eta11, d_gamma1*d_eta11, d_gamma2*d_eta11, d_alpha1*d_eta11, d_alpha2*d_eta11, d_eta11*d_eta11, d_eta12*d_eta11, d_eta21*d_eta11, d_eta22*d_eta11],
                                    [d_beta1*d_eta12, d_beta2*d_eta12, d_delta1*d_eta12, d_delta2*d_eta12, d_gamma1*d_eta12, d_gamma2*d_eta12, d_alpha1*d_eta12, d_alpha2*d_eta12, d_eta11*d_eta12, d_eta12*d_eta12, d_eta21*d_eta12, d_eta22*d_eta12],
                                    [d_beta1*d_eta21, d_beta2*d_eta21, d_delta1*d_eta21, d_delta2*d_eta21, d_gamma1*d_eta21, d_gamma2*d_eta21, d_alpha1*d_eta21, d_alpha2*d_eta21, d_eta11*d_eta21, d_eta12*d_eta21, d_eta21*d_eta21, d_eta22*d_eta21],
                                    [d_beta1*d_eta22, d_beta2*d_eta22, d_delta1*d_eta22, d_delta2*d_eta22, d_gamma1*d_eta22, d_gamma2*d_eta22, d_alpha1*d_eta22, d_alpha2*d_eta22, d_eta11*d_eta22, d_eta12*d_eta22, d_eta21*d_eta22, d_eta22*d_eta22]])
            
            
            
            return grad_entries

        def partial_sq(theta):
            
            beta1, beta2, delta1, delta2, gamma1, gamma2, alpha1, alpha2, eta11, eta12, eta21, eta22 = theta
            
            # delta1 second derivatives
            
            d_delta1_d_eta11 = -tI1 * (y_lag / n_lag) * np.exp(-eta11 * s_lag) * s_lag
            d_delta1_d_eta12 = 0
            d_delta2_d_eta11 = 0
            d_delta2_d_eta12 = -tI2 * (y_lag / n_lag) * np.exp(-eta12 * s_lag) * s_lag
            d_gamma1_d_eta22 = 0
            d_gamma1_d_alpha2 = 0
            d_gamma2_d_eta21 = 0
            d_gamma2_d_alpha1 = 0
            d_alpha1_d_gamma2 = 0
            d_alpha1_d_eta22 = 0
            d_alpha1_d_alpha2 = 0
            d_alpha2_d_gamma1 = 0
            d_alpha2_d_eta21 = 0
            d_alpha2_d_alpha1 = 0
            d_eta11_d_delta1 = -tI1 * s_lag * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_eta11_d_delta2 = 0
            d_eta11_d_eta11 = tI1 * delta1 * (s_lag**2) * (y_lag / n_lag) * np.exp(-eta11 * s_lag)
            d_eta11_d_eta12 = 0
            d_eta12_d_delta1 = 0
            d_eta12_d_delta2 = -tI2 * s_lag * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            d_eta12_d_eta11 = 0
            d_eta12_d_eta12 = tI2 * delta2 * (s_lag**2) * (y_lag / n_lag) * np.exp(-eta12 * s_lag)
            d_eta21_d_gamma2 = 0
            d_eta21_d_eta22 = 0
            d_eta21_d_alpha2 = 0
            d_eta22_d_gamma1 = 0
            d_eta22_d_eta21 = 0
            d_eta22_d_alpha1 = 0
            
            # summations
            
            d_gamma1_d_eta21 = []
            d_gamma1_d_alpha1 = []
            d_gamma2_d_eta22 = []
            d_gamma2_d_alpha2 = []
            d_alpha1_d_gamma1 = []
            d_alpha1_d_eta21 = []
            d_alpha1_d_alpha1 = []
            d_alpha2_d_gamma2 = []
            d_alpha2_d_eta22 = []
            d_alpha2_d_alpha2 = []
            d_eta21_d_gamma1 = []
            d_eta21_d_eta21 = []
            d_eta21_d_alpha1 = []
            d_eta22_d_gamma2 = []
            d_eta22_d_eta22 = []
            d_eta22_d_alpha2 = []
            
            
            for i in range(0, N):
                
                mask = np.arange(N) != i # mask out the current yard i
                
                d_gamma1_d_eta21_i = -tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * s_lag[mask])
                d_gamma1_d_alpha1_i = -tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_gamma2_d_eta22_i = -tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * s_lag[mask])
                d_gamma2_d_alpha2_i = -tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_alpha1_d_gamma1_i = -tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_alpha1_d_eta21_i = gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * dist[:, i][mask].reshape(N-1, 1) * s_lag[mask])
                d_alpha1_d_alpha1_i = gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * (dist[:, i][mask].reshape(N-1, 1))**2)
                d_alpha2_d_gamma2_i = -tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * dist[:, i][mask].reshape(N-1, 1))
                d_alpha2_d_eta22_i = gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * dist[:, i][mask].reshape(N-1, 1) * s_lag[mask])
                d_alpha2_d_alpha2_i = gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * (dist[:, i][mask].reshape(N-1, 1))**2)
                d_eta21_d_gamma1_i = -tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * s_lag[mask])
                d_eta21_d_eta21_i = gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * (s_lag[mask]**2))
                d_eta21_d_alpha1_i = gamma1 * tI1[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta21 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha1 * dist[:, i][mask].reshape(N-1, 1))) * sI1_lag[mask] * s_lag[mask] * (dist[:, i][mask].reshape(N-1, 1)))
                d_eta22_d_gamma2_i = -tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * s_lag[mask])
                d_eta22_d_eta22_i = gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * (s_lag[mask]**2))
                d_eta22_d_alpha2_i = gamma2 * tI2[i] * np.sum((a_lag[mask] * (y_lag[mask] / n_lag[mask])) * np.exp(-eta22 * s_lag[mask]) * (w_lag[:, i][mask].reshape(N-1, 1) * np.exp(-alpha2 * dist[:, i][mask].reshape(N-1, 1))) * sI2[mask] * s_lag[mask] * (dist[:, i][mask].reshape(N-1, 1)))

                d_gamma1_d_eta21.append(d_gamma1_d_eta21_i)
                d_gamma1_d_alpha1.append(d_gamma1_d_alpha1_i)
                d_gamma2_d_eta22.append(d_gamma2_d_eta22_i)
                d_gamma2_d_alpha2.append(d_gamma2_d_alpha2_i)
                d_alpha1_d_gamma1.append(d_alpha1_d_gamma1_i)
                d_alpha1_d_eta21.append(d_alpha1_d_eta21_i)
                d_alpha1_d_alpha1.append(d_alpha1_d_alpha1_i)
                d_alpha2_d_gamma2.append(d_alpha2_d_gamma2_i)
                d_alpha2_d_eta22.append(d_alpha2_d_eta22_i)
                d_alpha2_d_alpha2.append(d_alpha2_d_alpha2_i)
                d_eta21_d_gamma1.append(d_eta21_d_gamma1_i)
                d_eta21_d_eta21.append(d_eta21_d_eta21_i)
                d_eta21_d_alpha1.append(d_eta21_d_alpha1_i)
                d_eta22_d_gamma2.append(d_eta22_d_gamma2_i)
                d_eta22_d_eta22.append(d_eta22_d_eta22_i)
                d_eta22_d_alpha2.append(d_eta22_d_alpha2_i)
                
            d_gamma1_d_eta21 = np.array(d_gamma1_d_eta21).reshape((N, 1))
            d_gamma1_d_alpha1 = np.array(d_gamma1_d_alpha1).reshape((N, 1))
            d_gamma2_d_eta22 = np.array(d_gamma2_d_eta22).reshape((N, 1))
            d_gamma2_d_alpha2 = np.array(d_gamma2_d_alpha2).reshape((N, 1))
            d_alpha1_d_gamma1 = np.array(d_alpha1_d_gamma1).reshape((N, 1))
            d_alpha1_d_eta21 = np.array(d_alpha1_d_eta21).reshape((N, 1))
            d_alpha1_d_alpha1 = np.array(d_alpha1_d_alpha1).reshape((N, 1))
            d_alpha2_d_gamma2 = np.array(d_alpha2_d_gamma2).reshape((N, 1))
            d_alpha2_d_eta22 = np.array(d_alpha2_d_eta22).reshape((N, 1))
            d_alpha2_d_alpha2 = np.array(d_alpha2_d_alpha2).reshape((N, 1))
            d_eta21_d_gamma1 = np.array(d_eta21_d_gamma1).reshape((N, 1))
            d_eta21_d_eta21 = np.array(d_eta21_d_eta21).reshape((N, 1))
            d_eta21_d_alpha1 = np.array(d_eta21_d_alpha1).reshape((N, 1))
            d_eta22_d_gamma2 = np.array(d_eta22_d_gamma2).reshape((N, 1))
            d_eta22_d_eta22 = np.array(d_eta22_d_eta22).reshape((N, 1))
            d_eta22_d_alpha2 = np.array(d_eta22_d_alpha2).reshape((N, 1))
                
                
            
            zero = np.zeros((N, 1))
            
            hess_entries = np.array([[zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero],    #d_beta1
                                    [zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero, zero],    #d_beta2
                                    [zero, zero, zero, zero, zero, zero, zero, zero, d_eta11_d_delta1, zero, zero, zero],    #d_delta1
                                    [zero, zero, zero, zero, zero, zero, zero, zero, zero, d_eta12_d_delta2, zero, zero],    #d_delta2
                                    [zero, zero, zero, zero, zero, zero, d_alpha1_d_gamma1, zero, zero, zero, d_eta21_d_gamma1, zero],    #d_gamma1
                                    [zero, zero, zero, zero, zero, zero, zero, d_alpha2_d_gamma2, zero, zero, zero, d_eta22_d_gamma2],    #d_gamma2
                                    [zero, zero, zero, zero, d_gamma1_d_alpha1, zero, d_alpha1_d_alpha1, zero, zero, zero, d_eta21_d_alpha1, zero],    #d_alpha1
                                    [zero, zero, zero, zero, zero, d_gamma2_d_alpha2, zero, d_alpha2_d_alpha2, zero, zero, zero, d_eta22_d_alpha2],    #d_alpha2
                                    [zero, zero, d_delta1_d_eta11, zero, zero, zero, zero, zero, d_eta11_d_eta11, zero, zero, zero],    #d_eta11
                                    [zero, zero, zero, d_delta2_d_eta12, zero, zero, zero, zero, zero, d_eta12_d_eta12, zero, zero],    #d_eta12
                                    [zero, zero, zero, zero, d_gamma1_d_eta21, zero, d_alpha1_d_eta21, zero, zero, zero, d_eta21_d_eta21, zero],    #d_eta21
                                    [zero, zero, zero, zero, zero, d_gamma2_d_eta22, zero, d_alpha2_d_eta22, zero, zero, zero, d_eta22_d_eta22]])   #d_eta22
            
            
            return hess_entries



        # Gradient entries
        def gradient(theta):
            
            mu = y - (n / (1 + np.exp(-eta(theta))))
            
            # Gradient 
            gradient = - (1 / N) * np.sum((partial(theta) * mu), axis=1)
            
            return gradient
        
        # Gradient entries
        def gradient1(theta):
            
            mu = y - (n / (1 + np.exp(-eta(theta))))
            
            # Gradient 
            gradient1 = - (1 / N) * np.sum((partial(theta) * mu), axis=1)
            
            return gradient1.ravel()


        # Hessian
        def hessian(theta):
            
            mu = y - (n / (1 + np.exp(-eta(theta))))
            
            # Hessian entries
            hessian = - (1 / N) * np.sum((partial_sq(theta) * mu - n * (partial_by_partial(theta)) * (np.exp(-eta(theta)) / (1 + np.exp(-eta(theta)))**2)), axis=2)
            hessian = hessian.reshape((12, 12))
            
            return hessian
        
        
        # Adam optimizer
        def adam(theta, alpha, num_iters, tolerance, b_1=0.9, b_2=0.999, eps=1e-8, clip_norm=100.0):
            theta = theta.copy()
            J_history = []
            m = np.zeros(theta.shape)
            v = np.zeros(theta.shape)
            
            for i in range(num_iters):
                
                g = gradient(theta)

                # Gradient clipping
                g_norm = np.linalg.norm(g)
                if g_norm > clip_norm:
                    g = (g / g_norm) * clip_norm
                
                m = b_1 * m + (1 - b_1) * g
                v = b_2 * v + (1 - b_2) * g**2
                mhat = m / (1 - b_1**(i+1))
                vhat = v / (1 - b_2**(i+1))
                
                change = alpha * mhat / (np.sqrt(vhat) + eps)
                theta = theta - change
                
                J_history.append(costFunction(theta))
                
                
                
                if i % 100 == 0:
                    
                    max_abs_grad = np.max(np.abs(g)) 
                    min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
                    print('Year: ', year, 'Period: ', period, '#', i , 'cost: ', np.round(costFunction(theta), 6), 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, 'MaxTheta: ', np.argmax(np.abs(theta)), 'MaxThetaValue: ', np.round(np.max(np.abs(theta)),4), end='\r')
                    
                    if (max_abs_grad <= tolerance) and (min_eig_hess > 0):
                        break
                
            return theta, J_history, g 
        
        
        # Adam optimizer with step decay
        def adam_with_decay(theta, initial_alpha, num_iters, decay_rate, decay_steps, tolerance, b_1=0.9, b_2=0.999, eps=1e-8, clip_norm=0.5):
            theta = theta.copy()
            J_history = []
            m = np.zeros(theta.shape)
            v = np.zeros(theta.shape)
            alpha = initial_alpha

            for i in range(num_iters):

                # decay the learning rate every few steps
                if i % decay_steps == 0 and i != 0:
                    alpha *= decay_rate

                g = gradient(theta)

                # Gradient clipping
                g_norm = np.linalg.norm(g)
                if g_norm > clip_norm:
                    g = (g / g_norm) * clip_norm

                m = b_1 * m + (1 - b_1) * g
                v = b_2 * v + (1 - b_2) * g**2
                mhat = m / (1 - b_1**(i+1))
                vhat = v / (1 - b_2**(i+1))

                change = alpha * mhat / (np.sqrt(vhat) + eps)
                theta = theta - change

                J_history.append(costFunction(theta))

                if i % 100 == 0:
                    max_abs_grad = np.max(np.abs(g)) 
                    min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
                    print('Year: ', year, 'Period: ', period, '#', i , 'cost: ', np.round(costFunction(theta), 6), 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, 'MaxTheta: ', np.argmax(np.abs(theta)), 'MaxThetaValue: ', np.round(np.max(np.abs(theta)),4), 'alpha: ', alpha, end='\r')

                    
                    if (year == 2014) & (period == 'may-jun'):
            
                        np.save('../reports/parameters/theta_may-jun_2014_1.npy', theta)
                    
                    elif (year == 2014) & (period == 'jun-jul'):
                        
                        np.save('../reports/parameters/theta_jun-jul_2014_1.npy', theta)
                        
                    elif (year == 2015) & (period == 'may-jun'):
                        
                        np.save('../reports/parameters/theta_may-jun_2015_1.npy', theta)
                        
                    elif (year == 2015) & (period == 'jun-jul'):
                        
                        np.save('../reports/parameters/theta_jun-jul_2015_1.npy', theta)
                        
                    elif (year == 2016) & (period == 'may-jun'):
                        
                        np.save('../reports/parameters/theta_may-jun_2016_1.npy', theta)
                        
                    elif (year == 2016) & (period == 'jun-jul'):
                        
                        np.save('../reports/parameters/theta_jun-jul_2016_1.npy', theta)
                        
                    elif (year == 2017) & (period == 'may-jun'):
                        
                        np.save('../reports/parameters/theta_may-jun_2017_1.npy', theta)
                        
                    elif (year == 2017) & (period == 'jun-jul'):
                        
                        np.save('../reports/parameters/theta_jun-jul_2017_1.npy', theta)
                    
                    
                    if (max_abs_grad <= tolerance):
                        break

            return theta, J_history, g


            
        
        # Initialize fitting parameters
        theta = np.zeros((12,1))  # Create an array of zeros
        theta[2:] = np.random.normal(0, 100 / np.sqrt(N), size=(10, 1))  # Fill the rest with random numbers

        
        #while True:
            # Initialize fitting parameters
        #    theta = np.random.normal(0, 2 / np.sqrt(N), size=(12, 1))
            
        #    try:
                # Minimize the cost function and get the optimized parameter values
                #res = minimize(costFunction, theta, method='BFGS', jac=gradient1, hess=hessian, options={'gtol': 1e-5, 'maxiter': 100000, 'disp': True})
        #        res = minimize(costFunction, theta, method='BFGS', options={'gtol': 1e-5, 'maxiter': 100000, 'disp': True})
        #        theta = res.x
                
        #        max_abs_grad = np.max(np.abs(gradient(theta)))
        #        min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
                
        #        print('Year: ', year, 'Period: ', period, 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, end='\r')
                
                # Check custom convergence criteria
                #if (max_abs_grad < 1e-5) and (min_eig_hess > 0) and np.max(np.abs(theta)) < 500000:
        #        if (max_abs_grad < 1e-5) and np.max(np.abs(theta)) < 500000:
        #            break
        #    except np.linalg.LinAlgError:
        #        print('LinAlgError: eigenvalues did not converge. Retrying...')
        
        if year == 2014:
            
            alpha = 0.01
            tolerance = 1e-4
            theta = np.load('../reports/parameters/theta_may-jun_2014_SLSQP.npy', allow_pickle=True)
        
        if year == 2015:
            
            alpha = 1
            tolerance = 1e-5
            #theta = np.random.normal(0, 2 / np.sqrt(N), size=(12, 1))
        
        elif year == 2016:
        
            alpha = 1
            tolerance = 1e-5
            #theta = np.random.normal(0, 2 / np.sqrt(N), size=(12, 1))
        
        elif year == 2017:
            
            alpha = 0.05
            tolerance = 1e-5
            
            # Initialize fitting parameters
            #theta = np.random.normal(-0.5, 0.5, size=(12, 1))
            
            # Minimize the cost function and get the optimized initial parameter values
            #res = minimize(costFunction, theta, method='BFGS', jac=gradient1, hess=hessian, options={'gtol': 1e-8, 'maxiter': 100000, 'disp': True})
            #theta = res.x
            #theta = theta.reshape(12, 1)
            
            #if period == 'may-jun':
                
            #    theta = np.load('../reports/parameters/theta_may-jun_2017_num_1.npy', allow_pickle=True)
            
            #elif period == 'jun-jul':
                
            #    theta = np.load('../reports/parameters/theta_jun-jul_2017_num_1.npy', allow_pickle=True)
                
            #theta = theta.reshape(12, 1)
            
        #theta = np.load('../reports/theta2.npy')

        # Gradient descent settings
        iterations = 5000000

        theta, J_history, g = adam_with_decay(theta, initial_alpha=alpha, num_iters=iterations, decay_rate=1, decay_steps=5000, tolerance=tolerance)

        #print('iteration start:\t{:.3f}'.format(np.int32(checkpoint[1])))
        #print('previous final cost:\t{:.3f}'.format(checkpoint[2]))
        #print('updated final cost:\t{:.3f}'.format(J_history[-1]))
        #print('theta: \n', theta)

        #plt.plot(list(range(1, len(J_history) + 1)), J_history)
        #plt.xlabel('iterations')
        #plt.ylabel('cost')
        
        
        #plt.show()

        # Save trained parameters

        #iterations += checkpoint[1]
        checkpoint = np.array([theta, iterations, J_history, g], dtype=object)
        
        theta_list.append(theta)
        checkpoint_list.append(checkpoint)

        if (year == 2014) & (period == 'may-jun'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_may-jun_2014_1.npy', checkpoint)
        
        elif (year == 2014) & (period == 'jun-jul'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_jun-jul_2014_1.npy', checkpoint)
            
        elif (year == 2015) & (period == 'may-jun'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_may-jun_2015_1.npy', checkpoint)
            
        elif (year == 2015) & (period == 'jun-jul'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_jun-jul_2015_1.npy', checkpoint)
            
        elif (year == 2016) & (period == 'may-jun'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_may-jun_2016_1.npy', checkpoint)
            
        elif (year == 2016) & (period == 'jun-jul'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_jun-jul_2016_1.npy', checkpoint)
            
        elif (year == 2017) & (period == 'may-jun'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_may-jun_2017_1.npy', checkpoint)
            
        elif (year == 2017) & (period == 'jun-jul'):
            
            np.save('../reports/parameters/checkpoints/mle_checkpoint_jun-jul_2017_1.npy', checkpoint)

Year:  2014 Period:  may-jun # 24700 cost:  22.134388 MaxGrad:  9.916512962961783e-05 MinEig:  1.3931268879695012e-10 MaxTheta:  5 MaxThetaValue:  4190.6836 alpha:  0.011

In [25]:
theta

array([[  -1.90199427],
       [  -5.89298669],
       [1084.87231619],
       [  98.22151184],
       [ 826.58571766],
       [4190.68361118],
       [  12.36124891],
       [  19.77812373],
       [   3.90317286],
       [   1.55499324],
       [  -0.226496  ],
       [   0.7442895 ]])

In [14]:
w_lag[:, 0]

array([0.        , 0.02248995, 0.00662248, 0.0326708 , 0.0218138 ,
       0.00938308, 0.03003079, 0.01667218, 0.01701338, 0.01689585,
       0.01862485, 0.01852546, 0.01873603, 0.01681696, 0.00678086,
       0.02194052, 0.02566796, 0.0224682 , 0.02164904, 0.01959695,
       0.02119097, 0.03095749, 0.02063743, 0.01938343, 0.01891767,
       0.0191342 , 0.02087727, 0.01663278, 0.01026286, 0.01645465,
       0.01015198, 0.01649842, 0.00765829, 0.00690899, 0.00762512,
       0.00889952, 0.00725284, 0.00536449, 0.00467987, 0.00748682,
       0.00642059, 0.00658914, 0.01009279, 0.0114314 , 0.01122291,
       0.01033175, 0.01098708, 0.00901219, 0.00894552, 0.01099142,
       0.00901247, 0.00498172, 0.00494121, 0.00945305, 0.00926055,
       0.01047521, 0.01091334, 0.01056212, 0.00587598, 0.00558788,
       0.00856882, 0.00437667, 0.01978611, 0.01980495, 0.01931544,
       0.02248399, 0.0329257 , 0.02273099, 0.02492186, 0.02560626,
       0.01736334, 0.0203856 , 0.02167503, 0.02301612, 0.03359

In [None]:
# Minimize the cost function and get the optimized initial parameter values
            #res = minimize(costFunction, theta, method='BFGS', jac=gradient1, hess=hessian, options={'gtol': 1e-8, 'maxiter': 100000, 'disp': True})
            #theta = res.x
            #theta = theta.reshape(12, 1)

In [86]:
min_eig_hess = -1
while min_eig_hess <= 0:
    # Initialize fitting parameters
     
    theta = np.random.uniform(-10, 10000, size=(12, 1))
    
    try:
        # Compute Hessian
        hess = hessian(theta)

        # Compute minimum eigenvalue of the Hessian
        min_eig_hess = np.min(np.linalg.eigvalsh(hess))
        print('MinEig: ', min_eig_hess, end='\r')
    except np.linalg.LinAlgError:
        print("Failed to compute eigenvalues, retrying...", end='\r')
        continue

MinEig:  -0.048339770792663e-1370

KeyboardInterrupt: 

In [14]:
while True:
    # Initialize fitting parameters
    theta = np.random.normal(10, 10 / np.sqrt(N), size=(12, 1))
    
    try:
        # Minimize the cost function and get the optimized parameter values
        #res = minimize(costFunction, theta, method='BFGS', jac=gradient1, hess=hessian, options={'gtol': 1e-5, 'maxiter': 100000, 'disp': True})
        res = minimize(costFunction, theta, method='BFGS', options={'gtol': 1e-5, 'maxiter': 100000, 'disp': True})
        theta = res.x
        
        max_abs_grad = np.max(np.abs(gradient(theta)))
        min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
        
        print('Year: ', year, 'Period: ', period, 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, end='\r')
        print(theta.ravel())
        # Check custom convergence criteria
        if (max_abs_grad < 1e-5) and (min_eig_hess > 1e-5):
        #if (max_abs_grad < 1e-5) and np.max(np.abs(theta)) < 500000: 
            break
    except np.linalg.LinAlgError:
        print('LinAlgError: eigenvalues did not converge. Retrying...')
 
theta = theta.reshape(12, 1)        
np.save('../reports/parameters/theta_may-jun_2014_BFGS.npy', theta)

         Current function value: nan
         Iterations: 25
         Function evaluations: 1925
         Gradient evaluations: 148
LinAlgError: eigenvalues did not converge. Retrying...
         Current function value: 30.875477
         Iterations: 25
         Function evaluations: 599
         Gradient evaluations: 45
[ -0.78334465  -2.62405152  10.78212196  51.07813128  14.90650768 -0.00100571509771798
 -11.83671087   7.21130785  10.01841518   7.58676192   0.71983888
  10.2293322   10.66165511]


KeyboardInterrupt: 

In [13]:
gradient(theta)

array([[18.02944166],
       [68.53351592],
       [ 0.00000282],
       [ 0.04434991],
       [ 0.00039621],
       [ 0.01117746],
       [-0.00022887],
       [-0.0070653 ],
       [-0.0000257 ],
       [-0.00006961],
       [-0.00000374],
       [-0.00001694]])

In [36]:
from scipy.optimize import minimize
from scipy.optimize import Bounds

# Define the bounds for theta parameters
# Indices for beta1 and beta2 are [0, 1], they should be between -100 and 100

# Lower bounds (-inf for no bound)
lower_bounds = np.full(theta.size, -np.inf)
lower_bounds[2:] = 0  # All parameters from delta to eta should be non-negative
lower_bounds[:2] = -100  # lower bounds for beta1 and beta2

# Upper bounds (inf for no bound)
upper_bounds = np.full(theta.size, np.inf)
upper_bounds[:2] = 100  # upper bounds for beta1 and beta2

bounds = Bounds(lower_bounds, upper_bounds)  # apply bounds

while True:
    # Initialize fitting parameters
    theta = np.random.normal(np.random.uniform(0, 100), 100 / np.sqrt(N), size=(12, 1))
    theta = np.ravel(theta)  # ensure theta is a 1-D array
    
    try:
        # Minimize the cost function and get the optimized parameter values
        res = minimize(costFunction, theta, method='SLSQP', bounds=bounds, options={'ftol': 1e-10, 'maxiter': 100000, 'disp': True})
        theta = res.x
        
        max_abs_grad = np.max(np.abs(gradient(theta)))
        min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
        
        print('Year: ', year, 'Period: ', period, 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, end='\r')
        
        # Check custom convergence criteria
        if (max_abs_grad < 1e-4) and (min_eig_hess > 1e-8):
            break
    except np.linalg.LinAlgError:
        print('LinAlgError: eigenvalues did not converge. Retrying...')

theta = theta.reshape(12, 1)        
np.save('../reports/parameters/theta_may-jun_2014_SLSQP.npy', theta)


Optimization terminated successfully    (Exit mode 0)
            Current function value: 10.520207070601893
            Iterations: 41
            Function evaluations: 541
            Gradient evaluations: 41
Optimization terminated successfully    (Exit mode 0)607e-05 MinEig:  -3.1953376888106294e-09
            Current function value: 10.520197917038132
            Iterations: 49
            Function evaluations: 651
            Gradient evaluations: 49
Optimization terminated successfully    (Exit mode 0)207e-05 MinEig:  -4.018649863049439e-07
            Current function value: 10.520208031471855
            Iterations: 39
            Function evaluations: 513
            Gradient evaluations: 39
Optimization terminated successfully    (Exit mode 0)559e-05 MinEig:  -8.678448132843538e-10
            Current function value: 10.520235823647093
            Iterations: 43
            Function evaluations: 575
            Gradient evaluations: 43
Optimization terminated successfully  

KeyboardInterrupt: 

In [38]:
from scipy.optimize import minimize
from scipy.optimize import Bounds

# Define the bounds for theta parameters
# Indices for beta1 and beta2 are [0, 1], they should be between -100 and 100

# Lower bounds (-inf for no bound)
lower_bounds = np.full(theta.size, -np.inf)
lower_bounds[2:] = 0  # All parameters from delta to eta should be non-negative
lower_bounds[:2] = -100  # lower bounds for beta1 and beta2

# Upper bounds (inf for no bound)
upper_bounds = np.full(theta.size, np.inf)
upper_bounds[:2] = 100  # upper bounds for beta1 and beta2

bounds = Bounds(lower_bounds, upper_bounds)  # apply bounds

while True:
    # Initialize fitting parameters
    theta = np.random.normal(10, 100 / np.sqrt(N), size=(12, 1))
    theta = np.ravel(theta)  # ensure theta is a 1-D array
    
    try:
        # Minimize the cost function and get the optimized parameter values
        res = minimize(costFunction, theta, method='trust-constr', bounds=bounds, options={'xtol': 1e-6, 'maxiter': 100000, 'disp': True})
        theta = res.x
        
        max_abs_grad = np.max(np.abs(gradient(theta)))
        min_eig_hess = np.min(np.linalg.eigvalsh(hessian(theta)))
        
        print('Year: ', year, 'Period: ', period, 'MaxGrad: ', max_abs_grad, 'MinEig: ', min_eig_hess, end='\r')
        
        # Check custom convergence criteria
        if (max_abs_grad < 1e-5) and (min_eig_hess > 1e-8):
            break
    except np.linalg.LinAlgError:
        print('LinAlgError: eigenvalues did not converge. Retrying...')

theta = theta.reshape(12, 1)        
np.save('../reports/parameters/theta_may-jun_2014_trust.npy', theta)


`xtol` termination condition is satisfied.
Number of iterations: 1157, function evaluations: 15678, CG iterations: 3841, optimality: 2.08e-07, constraint violation: 0.00e+00, execution time: 1e+02 s.
`gtol` termination condition is satisfied.75151055327026e-05 MinEig:  -2.636271578424329e-09
Number of iterations: 502, function evaluations: 6929, CG iterations: 1776, optimality: 3.08e-09, constraint violation: 0.00e+00, execution time: 4.5e+01 s.
Year:  2017 Period:  may-jun MaxGrad:  8.275135712407697e-05 MinEig:  -3.601133273352982e-09

In [37]:
theta

array([41.59256371, 51.72961433, 10.34049695, 44.71963792, 40.93659022,
       29.47460257, 38.31206175, 28.92169189, 37.22852699, 30.93316521,
       48.38964612, 35.82492162])

In [33]:
gradient(theta)

array([[ 43.94539231],
       [151.80790582],
       [  0.00067848],
       [  0.00815129],
       [  0.04181238],
       [  0.00010497],
       [ -0.37976637],
       [ -0.00006866],
       [ -0.008642  ],
       [ -0.        ],
       [ -1.40044642],
       [ -0.        ]])

## Function

$$
\eta_{i}=\sum_{k=1}^{K} I_{k}^{(t)}(i)\left[\beta_{k}+\delta_{k}\left(\frac{\tilde{y}_{i}}{n_{\tilde{y}_{i}}} \exp{\left(-\eta_{1k} s_{i}\right)}\right)+\gamma_{k} \sum_{j=1}^{M_{i}}\left(\frac{a_{j} z_{j}}{n_{z_{j}}} \exp{\left(-\eta_{2k} s_{j}\right)} w_{i j} \exp{\left(-\alpha_{k} d_{i j}\right)} I_{k}^{(s)}(j)\right)\right]
$$

## Derivatives

$$
\begin{align*}
\frac{\partial \eta_{i}}{\partial \beta_{k}} &= I_{k}^{(t)}(i) \\

\frac{\partial \eta_{i}}{\partial \delta_{k}} &= I_{k}^{(t)}(i)\left(\frac{\tilde{y}_{i}}{n_{\tilde{y}_{i}}}\right) \exp \left(-\eta_{1 k} s_{i}\right) \\

\frac{\partial \eta_{i}}{\partial \eta_{1 k}} &= -I_{k}^{(t)}(i) \delta_{k} s_{i}\left(\frac{\tilde{y}_{i}}{n_{\tilde{y}_{i}}}\right) \exp \left(-\eta_{1 k} s_{i}\right) \\

\frac{\partial \eta_{i}}{\partial \eta_{2 k}} &= -\gamma_{k} I_{k}^{(t)}(i) \sum_{j=1}^{M_{i}}\left[\left(\frac{a_{j} z_{j}}{n_{z j}}\right) \exp \left(-\eta_{2 k} s_{j}\right) w_{i j} \exp \left(-\alpha_{k} d_{i j}\right) I_{k}^{(s)}(j) s_{j}\right] \\

\frac{\partial \eta_{i}}{\partial \gamma_{k}} &= I_{k}^{(t)}(i) \sum_{j=1}^{M_{i}}\left[\left(\frac{a_{j} z_{j}}{n_{z_{j}}}\right) \exp \left(-\eta_{2 k} s_{j}\right) w_{i j} \exp \left(-\alpha_{k} d_{i j}\right) I_{k}^{(s)}(j)\right] \\

\frac{\partial \eta_{i}}{\partial \alpha_{k}} &= -\gamma_{k} I_{k}^{(t)}(i) \sum_{j=1}^{M_{i}}\left[\left(\frac{a_{j} z_{j}}{n_{z_{j}}}\right) \exp \left(-\eta_{2 k} s_{j}\right) w_{i j} \exp \left(-\alpha_{k} d_{i j}\right) I_{k}^{(s)}(j) d_{i j}\right]
\end{align*}
$$

### Cost Function

$$J(\theta) = 
-\frac{1}{N} \sum_{i=1}^{N} y_{i} \eta_{i}-n_{i} \log \left(1+e^{\eta_{i}}\right)
$$

### Gradient

$$\frac{\partial J}{\partial \theta} =
-\frac{1}{N}\sum_{i=1}^{N} \frac{\partial \eta_{i}}{\partial \theta}\left(y_{i}-\frac{n_{i}}{1+e^{-\eta_{i}}}\right)
$$


### Hessian

$$\frac{\partial^2 J}{\partial \theta^2} = -\frac{1}{N}
\sum_{i=1}^{N}\left[\frac{\partial^{2} \eta_{i}}{\partial \theta^{2}}\left(y_{i}-\frac{n_{i}}{1+e^{-\eta_{i}}}\right)- n_{i} \left(\frac{\partial \eta_{i}}{\partial \theta}\right)^{2}\frac{e^{-\eta_{i}}}{\left(1+e^{-\eta_{i}}\right)^2}\right]
$$

## Optimization

In [50]:
#checkpoint = np.array([theta, iterations, J_history], dtype=object)
#np.save('../reports/checkpoint_June_July2_backup.npy', checkpoint)

In [45]:
checkpoint = np.load('../reports/checkpoint_June_July2.npy', allow_pickle=True)
theta = checkpoint[0]

In [238]:
theta

array([[    1.5457632 ],
       [   -0.07547458],
       [   -4.8742034 ],
       [   51.98424236],
       [-3122.39869964],
       [-4194.55086314],
       [   -5.29015772],
       [   -5.85815199],
       [   -6.70538574],
       [   -0.91393116],
       [   -0.59329742],
       [    0.02099419]])

## Parameter Estimation

## Function

$$
\eta_{i}=\sum_{k=1}^{K} I_{k}^{(t)}(i)\left[\beta_{k}+\delta_{k}\left(\frac{\tilde{y}_{i}}{n_{\tilde{y}_{i}}} \exp{\left(-\eta_{1k} s_{i}\right)}\right)+\gamma_{k} \sum_{j=1}^{M_{i}}\left(\frac{a_{j} z_{j}}{n_{z_{j}}} \exp{\left(-\eta_{2k} s_{j}\right)} w_{i j} \exp{\left(-\alpha_{k} d_{i j}\right)} I_{k}^{(s)}(j)\right)\right]
$$

$$\beta_1 = -2.89902093$$
$$\beta_2 = -4.33376942$$
$$\delta_1 = 3.86406603$$
$$\delta_2 = 7.1830044$$
$$\gamma_1 = 0.06209235$$
$$\gamma_2 = 6.21221296$$
$$\alpha_1 = 0.17578305$$
$$\alpha_2 = 1.31264131$$
$$\eta_{11} = 0.13978209$$
$$\eta_{12} = 0.40521989$$
$$\eta_{21} = -0.79182359$$
$$\eta_{22} = 0.55742334$$

## Eigenvalues of Hessian

In [61]:
np.linalg.eigvalsh(hessian(theta_numpy))

array([   0.        ,    0.00000037,    0.00000144,    0.00000554,
          0.00212706,    0.00660576,    0.02351497,    0.40513871,
          2.03226417,    3.69983217,  715.52475767, 2615.2486514 ])

## Gradient

In [11]:
gradient(theta)

array([[ 0.00035165],
       [ 0.00000092],
       [ 0.00093506],
       [-0.00000046],
       [ 0.00370447],
       [-0.00027325],
       [-0.00982039],
       [-0.00226836],
       [-0.00229518],
       [ 0.00000655],
       [-0.00047533],
       [ 0.00037915]])

In [31]:
nGrad = nd.Gradient(costFunction)
nHess = nd.Hessian(costFunction)
hess_theta = nHess(theta.reshape(12,))
grad_theta = nGrad(theta.reshape(12,))

In [10]:
def prob(theta):
    
    p = 1 / (1 + np.exp(-eta(theta)))
    
    return p

In [12]:
print('estimated probability of disease: \n', prob(theta))

estimated probability of disease: 
 [[0.02611759]
 [0.02631787]
 [0.02620498]
 [0.03368977]
 [0.02623533]
 [0.02624147]
 [0.02745905]
 [0.02710209]
 [0.04514933]
 [0.04289552]
 [0.16258389]
 [0.03171659]
 [0.15754236]
 [0.03329544]
 [0.02628519]
 [0.02748307]
 [0.02906667]
 [0.02786479]
 [0.02657169]
 [0.02892042]
 [0.02622809]
 [0.04582204]
 [0.5       ]
 [0.84480418]
 [0.24196183]
 [0.2553849 ]
 [0.33620011]
 [0.02684969]
 [0.49306239]
 [0.80226981]
 [0.53574716]
 [0.41690551]
 [0.91858263]
 [0.14165495]
 [0.        ]
 [0.02439342]
 [0.02535986]
 [0.02457473]
 [0.02567178]
 [0.17610494]
 [0.02407955]
 [0.01639678]
 [0.01495841]
 [0.02234735]
 [0.02345555]
 [0.02276539]
 [0.5       ]
 [0.02272612]
 [0.02457618]
 [0.11173597]
 [0.10794024]
 [0.22234288]
 [0.14008519]
 [0.2470191 ]
 [0.24700966]
 [0.02734554]
 [0.25025533]
 [0.02731197]
 [0.2534015 ]
 [0.02732704]
 [0.24922973]
 [0.02735182]
 [0.02728799]
 [0.02733912]
 [0.02321656]
 [0.02306974]
 [0.02242573]
 [0.5       ]
 [0.02569351

In [13]:
prob(theta).shape

(104, 1)