In [2]:
import numpy as np

%run IRLS.py

This example was discussed in the third Regression Modelling lecture. The observed variables are
* $x_1\in\mathbb R^+$  - hours of sunlight
* $x_2\in\{0,1\}$ - binary variable encoding if the data points corresponds to a weekday ($x_2=0$) or weekend ($x_2=1$)
* $x_3\in[0,1]$ - probability of rain
* $y\in\{0,1\}$ - response encoding if a person went to the beach, which happens with probability $p\in[0,1]$, i.e. $\mathbb P(y=1)=p$, $\mathbb P(y=0)=1-p$

The goal is to fit a logistic model.

---
We firstly create $10$ data points, for our $d=3$ variables.

In [4]:
''' Create some Data '''

n = 10
d = 3
x = np.zeros((n,d))
print(x.shape)
y = np.zeros(n)

# First data point is an observation on a day that had $1$ hour of sunlight, was a weekday, 
# had a high probability of rain (x_3=0.9), and the observed person did not go to the beach.
x[0,:] = np.array([1,0,0.9])
y[0] = 0

# Other data points created similar to the first one.
x[1,:] = np.array([10,1,0.01])
y[1] = 1
x[2,:] = np.array([10,1,0.01])
y[2] = 0
x[3,:] = np.array([5,1,0.2])
y[3] = 1
x[4,:] = np.array([5,1,0.2])
y[4] = 0
x[5,:] = np.array([5,1,0.2])
y[5] = 0
x[6,:] = np.array([6,0,0.2])
y[6] = 0
x[7,:] = np.array([2,0,0.7])
y[7] = 0
x[8,:] = np.array([11,0,0.0])
y[8] = 1
x[9,:] = np.array([11,0,0.2])
y[9] = 1

# Print the data
print('x = ',x)
print('y = ',y)

(10, 3)
x =  [[1.0e+00 0.0e+00 9.0e-01]
 [1.0e+01 1.0e+00 1.0e-02]
 [1.0e+01 1.0e+00 1.0e-02]
 [5.0e+00 1.0e+00 2.0e-01]
 [5.0e+00 1.0e+00 2.0e-01]
 [5.0e+00 1.0e+00 2.0e-01]
 [6.0e+00 0.0e+00 2.0e-01]
 [2.0e+00 0.0e+00 7.0e-01]
 [1.1e+01 0.0e+00 0.0e+00]
 [1.1e+01 0.0e+00 2.0e-01]]
y =  [0. 1. 0. 1. 0. 0. 0. 0. 1. 1.]


For illustrative purposes, we fit a linear model with the following predictors:
* an intercept $\beta_0$
* three variables $\beta_1$, .., $\beta_3$ corresponding to the three observed variables
* one parameter for the product $x_1x_3$.

In [5]:
''' Fit a linear model '''

# Compute the design matrix
X = np.c_[np.ones(n),x[:,0],x[:,1],x[:,2],x[:,0]*x[:,2]]

# Initialise the parameter vector
beta_lin = np.zeros(5)

# Calculate the OLS estimate for the linear model
beta_lin = np.dot(np.linalg.inv(np.dot(X.transpose(),X)),np.dot(X.transpose(),y))

# Calculate the fitted values
y_fit_lin = np.dot(X,beta_lin)

print(y_fit_lin)

[-0.02119611  0.669732    0.669732    0.22017866  0.22017866  0.22017866
  0.29670786  0.04730465  0.72158443  0.95559917]


We observe that the first entry is negative. This makes no sense as the reponse encodes a probability. A logistic model is thus a better choice, which we fit next.

In [6]:
''' Fit a logistic model '''

# Use the same design matrix as above
X = np.c_[np.ones(n),x[:,0],x[:,1],x[:,2],x[:,0]*x[:,2]]

# Initialise the parameter vector
beta_log = np.zeros(5)

# Compute the parameter estimates using the Iteratively Reweighted Least Squares algorithm, using different starting point
beta_start1 = np.zeros(5)
beta_log1 = IRLS(beta_start1,X,y)

beta_start2 = -np.ones(5)*1/2
beta_log2 = IRLS(beta_start2,X,y)

beta_start3 = -np.ones(5)
beta_log3 = IRLS(beta_start3,X,y)

# Print the three different estimates for beta
print(beta_log1)
print(beta_log2)
print(beta_log3)

[-4.62264555  0.5002413   0.24423673 -3.87820944  1.37470365]
[-1513.81889573   163.03125227   215.19230672  1256.69075473
   232.22928629]
[-803458.68967156   89733.69600397  115691.66338073  692933.43327457
   99903.51175443]


  mu[i] = 1/(1+np.exp(-np.dot(beta_hat,X[i,:])))


Starting from different starting points gives different estimates for $\beta$, as the iterative algorithm may get stuck in local minima or diverge. We can compare the different values by calculating the respective fitted values and the corresponding Root Mean Squared Errors.

In [7]:
''' Compute the RMSE to compare different estimates of beta '''

# Compute the fitted values
eta1 = np.dot(X,beta_log1)
y_fit_log1 = 1/(1+np.exp(-eta1))

eta2 = np.dot(X,beta_log2)
y_fit_log2 = 1/(1+np.exp(-eta2))

eta3 = np.dot(X,beta_log3)
y_fit_log3 = 1/(1+np.exp(-eta3))

rmse1 = np.sqrt(np.mean((y-y_fit_log1)**2))
rmse2 = np.sqrt(np.mean((y-y_fit_log2)**2))
rmse3 = np.sqrt(np.mean((y-y_fit_log3)**2))

print(rmse1)
print(rmse2)
print(rmse3)

0.3818162912072213
0.526881081806706
0.5477225575051661


  y_fit_log3 = 1/(1+np.exp(-eta3))


The first estimate of $\beta$ seems to be the best, but further starting points should be considered.

---
Note that the quality of the estimates gets better if more data is available, as illustrated by the next cell which is a copy of the above with more data points.

In [8]:
''' Create some Data '''
n = 50
d = 3
x = np.zeros((n,d))
y = np.zeros(n)

x[0,:] = np.array([1,0,0.9])
y[0] = 0
x[1,:] = np.array([10,1,0.01])
y[1] = 1
x[2,:] = np.array([10,1,0.01])
y[2] = 0
x[3,:] = np.array([5,1,0.2])
y[3] = 1
x[4,:] = np.array([5,1,0.2])
y[4] = 0
x[5,:] = np.array([5,1,0.2])
y[5] = 0
x[6,:] = np.array([6,0,0.2])
y[6] = 0
x[7,:] = np.array([2,0,0.7])
y[7] = 0
x[8,:] = np.array([11,0,0.0])
y[8] = 1
x[9,:] = np.array([11,0,0.2])
y[9] = 1

x[10,:] = np.array([4,0,0.12])
y[10] = 0
x[11,:] = np.array([8,1,0.1])
y[11] = 1
x[12,:] = np.array([2,1,0.01])
y[12] = 1
x[13,:] = np.array([0,1,0.9])
y[13] = 0
x[14,:] = np.array([5,1,0.6])
y[14] = 0
x[15,:] = np.array([5,1,0.8])
y[15] = 0
x[16,:] = np.array([6,0,0.9])
y[16] = 0
x[17,:] = np.array([7,0,0.1])
y[17] = 1
x[18,:] = np.array([7,1,0.0])
y[18] = 1
x[19,:] = np.array([11,0,0.4])
y[19] = 0

x[20,:] = np.array([1,0,0.9])
y[20] = 0
x[21,:] = np.array([10,1,0.1])
y[21] = 0
x[22,:] = np.array([10,0,0.1])
y[22] = 0
x[23,:] = np.array([5,1,0.1])
y[23] = 1
x[24,:] = np.array([5,0,0.8])
y[24] = 0
x[25,:] = np.array([1,1,0.4])
y[25] = 0
x[26,:] = np.array([6,0,0.5])
y[26] = 0
x[27,:] = np.array([2,0,0.3])
y[27] = 0
x[28,:] = np.array([2,0,0.3])
y[28] = 1
x[29,:] = np.array([11,0,0.1])
y[29] = 1

x[30,:] = np.array([4,0,0.9])
y[30] = 0
x[31,:] = np.array([2,0,0.01])
y[31] = 1
x[32,:] = np.array([2,0,0.02])
y[32] = 0
x[33,:] = np.array([4,1,0.2])
y[33] = 1
x[34,:] = np.array([5,1,0.0])
y[34] = 0
x[35,:] = np.array([5,1,0.0])
y[35] = 1
x[36,:] = np.array([3,0,0.2])
y[36] = 0
x[37,:] = np.array([2,0,0.7])
y[37] = 0
x[38,:] = np.array([10,0,0.0])
y[38] = 0
x[39,:] = np.array([11,0,0.1])
y[39] = 1

x[40,:] = np.array([10,1,0.0])
y[40] = 1
x[41,:] = np.array([10,1,0.1])
y[41] = 1
x[42,:] = np.array([9,1,0.1])
y[42] = 1
x[43,:] = np.array([2,0,0.8])
y[43] = 1
x[44,:] = np.array([5,1,1])
y[44] = 0
x[45,:] = np.array([0,1,1])
y[45] = 0
x[46,:] = np.array([6,0,1])
y[46] = 0
x[47,:] = np.array([2,0,0.7])
y[47] = 0
x[48,:] = np.array([5,0,0.1])
y[48] = 1
x[49,:] = np.array([11,0,0.1])
y[49] = 1


'''
Fit a logistic model
'''

# Use the same design matrix as before
X = np.c_[np.ones(n),x[:,0],x[:,1],x[:,2],x[:,0]*x[:,2]]

# Compute the parameter estimates using the Iteratively Reweighted Least Squares algorithm, using different starting point
beta_start11 = np.zeros(5)
beta_log11 = IRLS(beta_start11,X,y)

beta_start12 = -np.ones(5)*1/2
beta_log12 = IRLS(beta_start12,X,y)

beta_start13 = -np.ones(5)
beta_log13 = IRLS(beta_start13,X,y)

beta_start14 = np.ones(5)*1/2
beta_log14 = IRLS(beta_start14,X,y)


# Print the three different estimates for beta which were calculated using only 10 data points
print('beta_log1 = ',beta_log1)
print('beta_log2 = ',beta_log2)
print('beta_log3 = ',beta_log3)

# Print the four different estimates for beta which were calculated using all 50 data points
print('beta_log11 = ',beta_log11)
print('beta_log12 = ',beta_log12)
print('beta_log13 = ',beta_log13)
print('beta_log14 = ',beta_log14)





'''
Compute the RMSE to compare different estimates of beta
'''

# Compute the fitted values for the estimates calculated from the first 10 data points only
eta1 = np.dot(X,beta_log1)
y_fit_log1 = 1/(1+np.exp(-eta1))

eta2 = np.dot(X,beta_log2)
y_fit_log2 = 1/(1+np.exp(-eta2))

eta3 = np.dot(X,beta_log3)
y_fit_log3 = 1/(1+np.exp(-eta3))


# Compute the fitted values for the estimates calculated from the full data
eta11 = np.dot(X,beta_log11)
y_fit_log11 = 1/(1+np.exp(-eta11))

eta12 = np.dot(X,beta_log12)
y_fit_log12 = 1/(1+np.exp(-eta12))

eta13 = np.dot(X,beta_log13)
y_fit_log13 = 1/(1+np.exp(-eta13))

eta14 = np.dot(X,beta_log14)
y_fit_log14 = 1/(1+np.exp(-eta14))


'''
Calculate the RMSE
'''

# Calculate the RMSE for the fitted values calculated using the parameters estimates using only 10 data points
rmse1 = np.sqrt(np.mean((y-y_fit_log1)**2))
rmse2 = np.sqrt(np.mean((y-y_fit_log2)**2))
rmse3 = np.sqrt(np.mean((y-y_fit_log3)**2))

print('\nRMSE1 = ',rmse1)
print('RMSE2 = ',rmse2)
print('RMSE3 = ',rmse3)

# Calculate the RMSE for the fitted values calculated using the parameters estimates using all 50 data points
rmse11 = np.sqrt(np.mean((y-y_fit_log11)**2))
rmse12 = np.sqrt(np.mean((y-y_fit_log12)**2))
rmse13 = np.sqrt(np.mean((y-y_fit_log13)**2))
rmse14 = np.sqrt(np.mean((y-y_fit_log14)**2))

print('RMSE11 = ',rmse11)
print('RMSE12 = ',rmse12)
print('RMSE13 = ',rmse13)
print('RMSE14 = ',rmse14)

beta_log1 =  [-4.62264555  0.5002413   0.24423673 -3.87820944  1.37470365]
beta_log2 =  [-1513.81889573   163.03125227   215.19230672  1256.69075473
   232.22928629]
beta_log3 =  [-803458.68967156   89733.69600397  115691.66338073  692933.43327457
   99903.51175443]
beta_log11 =  [ 0.1125752   0.11964098  0.16142643 -2.99378273 -0.36843143]
beta_log12 =  [-60.18702415  25.88068448  -2.67139389  64.88954979 -28.25820353]
beta_log13 =  [-2756.58449761  1310.50377484   286.46937906  2652.40518622
 -1157.0694898 ]
beta_log14 =  [ 39.34197521  -7.25097291 -27.78621103   2.8053515  -46.8491768 ]

RMSE1 =  0.5653257881858086
RMSE2 =  0.7179977240394899
RMSE3 =  0.7348469228349535
RMSE11 =  0.40680424848111385
RMSE12 =  0.6379198281954256
RMSE13 =  0.7071067811865475
RMSE14 =  0.675425266701855


  y_fit_log2 = 1/(1+np.exp(-eta2))
  y_fit_log3 = 1/(1+np.exp(-eta3))


Judging from the RMSE, the best estimate is $\beta_{log11}$.