# Monte Carlo Evaluation of the Standard Optimizer in Python: OLS

In [1]:
# This code tests scipy.optimize with a standard OLS.
# Everything is properly specified, so the model should cleanly converge.
# Analytic first derivatives are used with BFGS.
# reps is the number of replications.
# obs is the number of observations per replication.
# params is number of regressors and set at two to review results with readprobit.py.  A constant is automatically added.
# tol is the tolerance passed to the optimizer.
# The issue is that as the sample size grows, the LV scales upward, which causes the optimization to terminate prematurely.

import numpy as np
import statsmodels.api as sm
import pandas as pd
from scipy.optimize import minimize
from scipy import stats

In [2]:
reps = 10000

results = np.zeros((reps, 7))

for i in range(reps):

    obs = 1000000
    params = 2
    tol = 1e-05

    params = params + 1
    beta = np.random.normal(0, 1, (params, 1))
    beta0 = np.zeros((params, 1))
    X = np.random.normal(0, 1, (obs, params-1))
    X = sm.add_constant(X)
    y = np.dot(X, beta) + np.random.normal(0, 1, (obs, 1))

    def ols(b, y, X, obs, params):
        bv = b.view()
        bv.shape = params, 1
        e = y - np.dot(X, bv)
        return np.array(np.sum(e**2))

    def ols_grad(b, y, X, obs, params):
        bv = b.view()
        bv.shape = params, 1
        foc = -np.sum(X * (y - np.dot(X, bv)), axis=0)
        return np.array(foc)

    res = minimize(ols, beta0, args=(y, X, obs, params), method='BFGS',
                   jac=ols_grad, options={'disp': True, 'maxiter':10000},
                   tol=tol)

    zero = res.x[0] - beta[0, 0]
    one = res.x[1] - beta[1, 0]
    two = res.x[2] - beta[2, 0]
    sumd = np.sum(y, axis=0)/obs

    results[i] = i+1, res.success, sumd, zero, one, two, -res.fun

Optimization terminated successfully.
         Current function value: 997683.910966
         Iterations: 6
         Function evaluations: 13
         Gradient evaluations: 13
Optimization terminated successfully.
         Current function value: 1000970.498028
         Iterations: 6
         Function evaluations: 14
         Gradient evaluations: 14
Optimization terminated successfully.
         Current function value: 998593.261719
         Iterations: 7
         Function evaluations: 17
         Gradient evaluations: 17
Optimization terminated successfully.
         Current function value: 1000219.744942
         Iterations: 6
         Function evaluations: 13
         Gradient evaluations: 13
Optimization terminated successfully.
         Current function value: 998850.626671
         Iterations: 6
         Function evaluations: 15
         Gradient evaluations: 15
Optimization terminated successfully.
         Current function value: 1002422.004454
         Iterations: 6
         

In [3]:
data = pd.DataFrame(results)
data.columns = ['rep', 'success', 'aved', 'one', 'two', 'three', 'llv']
data.head()

Unnamed: 0,rep,success,aved,one,two,three,llv
0,1.0,1.0,0.578867,-9.3e-05,0.000965,-0.000182,-997683.9
1,2.0,1.0,-0.09549,0.000651,0.000419,-0.000511,-1000970.0
2,3.0,1.0,0.545041,0.000446,0.001044,0.000141,-998593.3
3,4.0,1.0,-0.750869,-0.00037,0.001705,-0.000579,-1000220.0
4,5.0,1.0,-2.064396,0.000737,-0.000918,-0.001252,-998850.6


In [4]:
data1 = data[data['success'] == 1]
data0 = data[data['success'] == 0]

print(data1.describe())
print(data0.describe())

                rep  success         aved          one          two  \
count   9620.000000   9620.0  9620.000000  9620.000000  9620.000000   
mean    5002.477755      1.0     0.012997     0.000003    -0.000003   
std     2886.796487      0.0     1.006277     0.001007     0.001006   
min        1.000000      1.0    -3.747690    -0.004405    -0.004021   
25%     2506.750000      1.0    -0.662369    -0.000669    -0.000683   
50%     4998.500000      1.0     0.011061     0.000015     0.000005   
75%     7505.250000      1.0     0.688440     0.000681     0.000672   
max    10000.000000      1.0     4.258833     0.003773     0.003806   

             three           llv  
count  9620.000000  9.620000e+03  
mean     -0.000002 -9.999987e+05  
std       0.000996  1.398474e+03  
min      -0.003714 -1.006451e+06  
25%      -0.000659 -1.000949e+06  
50%      -0.000009 -9.999937e+05  
75%       0.000654 -9.990403e+05  
max       0.003937 -9.945357e+05  
               rep  success        aved      