In [1]:
import numpy as np
import numpy.linalg as npl
import statsmodels.api as smf

import pandas as pd

# Without intercept

The following functions allow us to generate W_i, eta_i, u_i, X_i, then y_i without interception (beta_0=0).

In [2]:
W_gen = lambda sigma_w,n : sigma_w*np.random.randn(n)


def eta_u_gen(sigma_eta,theta,n):
    
    sigma = np.array([[sigma_eta**2,theta*sigma_eta],[theta*sigma_eta,1]])
    
    result = npl.eig(sigma)
    
    D = np.diag(result[0])
    O = result[1]
    B = O@D**(1/2)
    
    N1 = np.random.randn(n)
    N2 = np.random.randn(n)
    
    N_new1 = []
    N_new2 = []

    for nn1,nn2 in zip(N1,N2):
    
     N_new1.append(B[0,0]*nn1 + B[0,1]*nn2)
     N_new2.append(B[1,0]*nn1 + B[1,1]*nn2)
    
    return np.array([N_new1,N_new2])
    

X_gen = lambda alpha,W,eta : alpha*W+eta

y_gen = lambda beta,X,u : (beta*X + u > 0).astype(int)

In [3]:
import scipy.stats as ss

We define the value of the parameters.

In [4]:
alpha = 0.7

beta = 2

sigma_w = 1
sigma_eta = np.sqrt(1-alpha**2)
theta = 0.7

In [5]:
sigma_eta

0.714142842854285

In [6]:
def eta_u_gen2(sigma_eta,theta,n):
    
    u = np.random.randn(n)
    N = np.random.randn(n)
    
    eta = sigma_eta*(theta*u+np.sqrt(1-theta**2)*N)
    
    return eta,u
    

The following function estimates the parameters beta_1 and theta when there is no intercept: beta_0=0, beta_1=beta.

In [7]:
def estimators_sin_intercepto(a,b,alpha,sigma_w,sigma_eta):

    gamma = (np.sqrt(2*np.pi)*alpha*sigma_w**2)/(np.sqrt(alpha**2*sigma_w**4+\
                                                   2*a**2*alpha**2*sigma_w**2*np.pi\
                                                   -4*a*b*alpha*sigma_w**2*np.pi\
                                                   +2*a**2*np.pi*sigma_eta**2))
    beta_est = a*gamma/(alpha*sigma_w**2)
    theta_est = (gamma*(b*alpha*sigma_w**2-a*alpha**2*sigma_w**2-a*sigma_eta**2))\
/(alpha*sigma_w**2*sigma_eta)
    gamma = (np.sqrt(2*np.pi)*alpha)/(np.sqrt(alpha**2+\
                                                   2*a**2*np.pi\
                                                   -4*a*b*alpha*np.pi))
#    beta_est = a*gamma/alpha
#    theta_est = (gamma*(b*alpha-a))\
#    /(alpha*sigma_eta)    
    return (beta_est,theta_est,gamma)
    

# With intercept

The following function defines the estimator with intercept.

In [8]:
Phi_inv = ss.norm.ppf

def estimators_con_intercepto(X,W,y,alpha,sigma_w,sigma_eta):
    
    
    t_inv = Phi_inv(y.mean())
    
    c = np.exp((1/2)*t_inv**2)
    
    a = c*(W*y).mean()
    
    b = c*(X*y).mean()
    
    first_estimators = estimators_sin_intercepto(a,b,alpha,sigma_w,sigma_eta)
    
    beta1_est = first_estimators[0]
    theta_est = first_estimators[1]
    gamma = first_estimators[2]
    beta0_est = ((1/np.sqrt(2*np.pi))*t_inv)*gamma
    
    return beta0_est,beta1_est,theta_est,gamma

We define the yi generator with intercept.

In [9]:
y_gen = lambda beta0,beta1,X,u : (beta0+beta1*X + u > 0).astype(int)

In [10]:
beta = 2

In [11]:
beta1 = beta
beta0 = 0.5

In [24]:
theta = 0.7

In what follows, epochs is the number of simulations, n is the amount of data for each simulation.

In [25]:
n = 2000

epochs = 1000

Beta0_our_est, Beta1_our_est and Theta_our_est correspond to the new estimators, Beta0_probit and Beta1_probit correspond to the classical probit method.

In [26]:
import time

t_I = time.time()

bachs = 50

list_beta0_est = []
list_beta0_probit = []
list_beta1_est = []
list_beta1_probit = []
list_theta_est = []

for epoch in range(epochs):
    
   
   W = W_gen(sigma_w,n)
   eta_u = eta_u_gen(sigma_eta,theta,n)
   eta = eta_u[0]
   u = eta_u[1]
   X = X_gen(alpha,W,eta)
   y = y_gen(beta0,beta1,X,u)
   beta0_est_sample_list = []
   beta1_est_sample_list = [] 
   theta_est_sample_list = [] 
   bach = 0 
   while bach <= bachs: 
    sample_indices = np.random.permutation(X.shape[0])[:int(n/2)]
#    sample_indices = [idx for idx in range(bach,bach+100)]
    X_sample = X[sample_indices]
    y_sample = y[sample_indices]
    W_sample = W[sample_indices]
    X_sample = (X_sample-X_sample.mean())/X_sample.std()
    W_sample = (W_sample-W_sample.mean())/W_sample.std()
    alpha_est_sample = np.cov(X_sample,W_sample)[0,1]/W_sample.var()
    sigma_eta_est_sample = (X_sample-alpha_est_sample*W_sample).std() 
    sigma_w_est_sample = W_sample.std()
    estimators = estimators_con_intercepto(X_sample,W_sample,y_sample,alpha_est_sample\
                                           ,sigma_w_est_sample,sigma_eta_est_sample)
    if np.abs(estimators[2])<=1.0:
     beta0_est_sample_list.append(estimators[0])
     beta1_est_sample_list.append(estimators[1])
     theta_est_sample_list.append(estimators[2])
     bach += 1
   list_beta0_est.append(np.nanmean(beta0_est_sample_list))  
   list_beta1_est.append(np.nanmean(beta1_est_sample_list)) 
   list_theta_est.append(np.nanmean(theta_est_sample_list)) 
   beta_probit = smf.Probit(y,np.concatenate([np.ones((W.shape[0],1)),X.reshape((-1,1))],axis = 1)).fit().params
   beta0_probit = beta_probit[0]
   list_beta0_probit.append(beta0_probit)
   beta1_probit = beta_probit[1]
   list_beta1_probit.append(beta1_probit)
#   print('*******************',epoch,'*******************************************')

predictions = {'Beta0_our_est':list_beta0_est,'Beta1_our_est':list_beta1_est,\
'Theta_our_est':list_theta_est,'Beta0_probit': list_beta0_probit,\
       'Beta1_probit': list_beta1_probit }
   
three_estimators = pd.DataFrame(predictions)

t_F = time.time()

Optimization terminated successfully.
         Current function value: 0.239144
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.229368
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.227908
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.235582
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.230080
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.232403
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.215733
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236195
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.242036
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.218368
  

Optimization terminated successfully.
         Current function value: 0.229274
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.224945
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236884
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.214029
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236688
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.224011
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.245063
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236486
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.232424
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.235384
  

Optimization terminated successfully.
         Current function value: 0.225398
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236428
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.222003
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236741
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231361
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.245300
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.234889
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.227211
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.227513
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.219736
  

Optimization terminated successfully.
         Current function value: 0.234836
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.247579
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.232732
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.211927
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.234260
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231183
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.238244
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.222781
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236285
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.228166
  

Optimization terminated successfully.
         Current function value: 0.214871
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.233419
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.247418
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.217379
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.226157
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.247354
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.237256
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236510
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.207010
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.237372
  

Optimization terminated successfully.
         Current function value: 0.221915
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.216175
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.250935
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.253595
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.233579
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236860
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.249472
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.246238
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.240988
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.230999
  

Optimization terminated successfully.
         Current function value: 0.241882
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.215004
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.226288
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.215750
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.248465
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.229902
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.239990
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.230103
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.246021
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.239841
  

Optimization terminated successfully.
         Current function value: 0.228023
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.227384
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.218882
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.220379
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.235170
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.223283
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.237564
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.227576
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.226111
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.204460
  

Optimization terminated successfully.
         Current function value: 0.244611
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.224204
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.229866
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.217678
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.240119
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.219950
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.216165
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236035
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.228472
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231862
  

Optimization terminated successfully.
         Current function value: 0.236408
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.240106
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.213955
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.200810
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.234054
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.214694
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.248811
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.214919
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.241799
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.212777
  

Optimization terminated successfully.
         Current function value: 0.223015
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.230191
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.219906
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.209029
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.209468
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.221340
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.237619
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.217307
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.224600
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.236787
  

Optimization terminated successfully.
         Current function value: 0.215975
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231502
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.245635
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.249606
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.234213
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.224899
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.233935
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.221683
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231164
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.218646
  

Optimization terminated successfully.
         Current function value: 0.209902
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.239678
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.240446
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.241682
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.245098
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.205072
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.231216
         Iterations 8
Optimization terminated successfully.
         Current function value: 0.222244
         Iterations 8


In [27]:
print('It took',round((t_F-t_I)/60,0),'minutes')

It took 1.0 minutes


Mean value of the estimators

In [28]:
three_estimators.mean()

Beta0_our_est    0.507428
Beta1_our_est    2.030247
Theta_our_est    0.702514
Beta0_probit     0.576967
Beta1_probit     2.892824
dtype: float64

Standard deviation of the estimators

In [29]:
three_estimators.std()

Beta0_our_est    0.084084
Beta1_our_est    0.212715
Theta_our_est    0.054216
Beta0_probit     0.050910
Beta1_probit     0.134146
dtype: float64

Mean squared error of the new estimators

In [30]:
((three_estimators[['Beta0_our_est','Beta1_our_est']] - [0.5,2])**2).mean()

Beta0_our_est    0.007118
Beta1_our_est    0.046117
dtype: float64