## Simulating - single feature, single label dataset

### **Without standard scaling**

In [1]:
import numpy as np
import pandas as pd

In [2]:
X = np.round(np.random.uniform(0,5, size= 1000), 3)

a, b  = np.random.normal(0, 0.1), np.random.normal(0, 0.2)#y_pred = a+ bx
error = np.random.normal(0, 1, size= 1000)

In [3]:
f= lambda x,e : np.round(a +b*x + e, 3)
data = pd.DataFrame({'features': X, 'targets':f(X, error)})

data.head(5)

Unnamed: 0,features,targets
0,1.634,-0.84
1,1.095,-0.81
2,4.865,0.51
3,1.968,1.468
4,0.029,1.85


In [83]:
data.to_csv("reg_sim_data.csv", index=None)

#### Training statsmodel

In [4]:
import statsmodels.api as sm
feats, targets= data['features'].values, data['targets'].values
feats= sm.add_constant(feats, prepend=False)

In [5]:
#Without standard scaling of data
model_stm = sm.OLS(targets, feats, hasconst=True)
result= model_stm.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     20.34
Date:                Mon, 23 Mar 2020   Prob (F-statistic):           7.24e-06
Time:                        08:44:21   Log-Likelihood:                -1454.1
No. Observations:                1000   AIC:                             2912.
Df Residuals:                     998   BIC:                             2922.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.1044      0.023      4.510      0.0

In [6]:
#Without standard scaling of data
print('Reference values from simulated dataset weight : {} & bias: {}'.format(b,a))

Reference values from simulated dataset weight : 0.10458035217582123 & bias: 0.11878560346515568


#### Training pytorch model

In [7]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

class toynet(nn.Module):
    def __init__(self):
        super(toynet, self).__init__()
        self.fc1 = nn.Linear(1,1)
        torch.nn.init.xavier_normal_(self.fc1.weight)
    def forward(self, x):
        x= self.fc1(x)
        return x

model = toynet()
model

toynet(
  (fc1): Linear(in_features=1, out_features=1, bias=True)
)

In [8]:
optimizer = optim.Adam(model.parameters())
loss = nn.MSELoss()

In [9]:
def sample(df, count):
    idx= np.random.randint(df.shape[0], size=count)
    x,y = df.values[idx][:,0].reshape(-1,1), df.values[idx][:,1]
    return x,y

def eval_hessian(loss_grad, model):
    cnt = 0
    for g in loss_grad:
        g_vector = g.contiguous().view(-1) if cnt == 0 else torch.cat([g_vector, g.contiguous().view(-1)])
        cnt = 1
    l = g_vector.size(0)
    hessian = torch.zeros(l, l)
    for idx in range(l):
        grad2rd = torch.autograd.grad(g_vector[idx], model.parameters(), create_graph=True)
        cnt = 0
        for g in grad2rd:
            g2 = g.contiguous().view(-1) if cnt == 0 else torch.cat([g2, g.contiguous().view(-1)])
            cnt = 1
        hessian[idx] = g2
    return hessian.cpu().data.numpy()

In [10]:
wts_bias_progression= list()
all_grads= list()
losses= list()

d2loss_dg= list()
diags= list()
f= lambda x: np.sqrt(np.abs(np.diag(np.linalg.inv(x)))).tolist()

for epoch in range(4000):
    x_, y_ = sample(data, count= 64)
    #x_,y_= data.values[:,0].reshape(-1,1), data.values[:,1]
    trainx = Variable(torch.Tensor(x_).float(), requires_grad= True)
    trainy = Variable(torch.reshape(torch.Tensor(y_).float(), (-1,1)))
    optimizer.zero_grad()
    y_pred= model(trainx)
    
    l = loss(y_pred, trainy)
    losses.append(l.item())
    if epoch%500==0:
        print('epoch: ', epoch, ' loss: ', l.item());
    
    l.backward()
    optimizer.step()
    
    wts_bias = list(model.parameters())
    wts_bias_vals= torch.cat((wts_bias[0], torch.reshape(wts_bias[1], (-1,1))),1)
    wts_bias_progression.append(wts_bias_vals.tolist())
    
    wts_bias_grads = torch.cat((wts_bias[0].grad, torch.reshape(wts_bias[1].grad, (-1,1))),1)
    #i_grads= trainx.grad
    all_grads.append(wts_bias_grads.tolist())#i_grads)
    d1loss_dg = torch.autograd.grad(loss(model(trainx), trainy), wts_bias, create_graph=True)
    d2loss_dg.append(eval_hessian(d1loss_dg, model))
    diags.append(f(d2loss_dg[-1]))

all_wts_bias = np.array(wts_bias_progression).reshape(-1,wts_bias_vals.shape[1])
all_wts_bias_df= pd.DataFrame(all_wts_bias, columns=['weight_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1])]+ ['bias'])

all_grads = np.array(all_grads).reshape(-1,wts_bias_vals.shape[1])
all_grads_df= pd.DataFrame(all_grads, columns=['beta_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1]+1)])

epoch:  0  loss:  2.4449501037597656
epoch:  500  loss:  1.2961708307266235
epoch:  1000  loss:  1.3966047763824463
epoch:  1500  loss:  1.5072994232177734
epoch:  2000  loss:  1.6176228523254395
epoch:  2500  loss:  1.184489130973816
epoch:  3000  loss:  0.8819632530212402
epoch:  3500  loss:  0.8783103227615356


In [11]:
list(model.parameters())

[Parameter containing:
 tensor([[0.1043]], requires_grad=True), Parameter containing:
 tensor([0.1559], requires_grad=True)]

#### Analytical standard error & T-value for b (slope estimator)

In [12]:
#S_b calc
sum_sqr_err = (1/(data['features'].shape[0] - 2))*(np.sum(error**2))
sum_sqr_diffx = np.sum((data['features'].values - np.mean(data['features'].values))**2)
s_beta = np.sqrt(sum_sqr_err/sum_sqr_diffx)
s_beta
#(1/(1000-2))*(np.sum(error**2))/np.sum((data['features'].values - np.mean(data['features'].values))**2)

0.023173378506969208

In [13]:
b_pred = list(model.parameters())[0] 
t= (b_pred - b)/s_beta
t#t-val

tensor([[-0.0112]], grad_fn=<DivBackward0>)

#### Calculating Pytorch Standar error

In [14]:
mini_batches= list()#contains mini_batches of gradients from 200 epochs
mean_mini_batches= list()#contains mean of each of 200 epochs each in a mini_batches of gradients from 200 epochs
mini_size= 200
for batch in range(1, (len(all_grads))//mini_size):
    mini_batches.append(all_grads[batch*mini_size:batch*mini_size+ mini_size])
    mean_mini_batches.append(np.mean(mini_batches[-1], axis=0).tolist())

grad_mini_batch_arr = np.array(mean_mini_batches).T
grad_mini_batch_arr.shape

(2, 19)

In [15]:
covariant_mat= np.cov(grad_mini_batch_arr)
pytorch_stderr= np.sqrt(covariant_mat.diagonal())#**2
pytorch_stderr

array([0.35230668, 0.09230267])

#### Stasmodel vs Pytorch comparison

In [16]:
summary= result.summary()
statsm_df = pd.DataFrame(summary.tables[1].data[1:], columns=summary.tables[1].data[0])
statsm_df

Unnamed: 0,Unnamed: 1,coef,std err,t,P>|t|,[0.025,0.975]
0,x1,0.1044,0.023,4.51,0.0,0.059,0.15
1,const,0.164,0.066,2.49,0.013,0.035,0.293


In [17]:
di = dict(zip('pytorch_grads,simulated_params,pytorch_params,statsmodel_params,statsmodel_stderr'.split(','), [all_grads[-1,:], [b,a], all_wts_bias[-1,:], result.params, statsm_df["std err"].values]))#all_grads
di.update({'pytorch_stderr':pytorch_stderr})
comp_df = pd.DataFrame(di, index=['b (slope/weight)','a (intercept/bias)'])
#comp_df['pytorch_stderr']= pytorch_stderr
comp_df['pytorch_params'] = comp_df['pytorch_params'].apply(lambda x:np.round(x,3))
comp_df['statsmodel_params'] = comp_df['statsmodel_params'].apply(lambda x:np.round(x,3))
#comp_df['Simulated_params']= [b,a]
comp_df

Unnamed: 0,pytorch_grads,simulated_params,pytorch_params,statsmodel_params,statsmodel_stderr,pytorch_stderr
b (slope/weight),-1.481094,0.10458,0.104,0.104,0.023,0.352307
a (intercept/bias),-0.359463,0.118786,0.156,0.164,0.066,0.092303


###  **With Standard scaling**

In [18]:
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
data_trans = data.copy()
trans_mat = sc.fit_transform(data_trans)
data_trans['features'] = trans_mat[:,0]
data_trans['targets']= trans_mat[:,1]
data_trans.head(5)

Unnamed: 0,features,targets
0,-0.588673,-1.205922
1,-0.969278,-1.177249
2,1.692836,0.084374
3,-0.352825,1.000006
4,-1.722013,1.365112


#### Training statsmodel

In [19]:
#With standard scaling of data
feats, targets= data_trans['features'].values, data_trans['targets'].values
feats= sm.add_constant(feats, prepend=False)

model_stm = sm.OLS(targets, feats, hasconst=True)
result_sc= model_stm.fit()
print(result_sc.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.019
Method:                 Least Squares   F-statistic:                     20.34
Date:                Mon, 23 Mar 2020   Prob (F-statistic):           7.24e-06
Time:                        08:46:02   Log-Likelihood:                -1408.8
No. Observations:                1000   AIC:                             2822.
Df Residuals:                     998   BIC:                             2832.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.1413      0.031      4.510      0.0

In [20]:
#With standard scaling of data
print('Reference values from simulated dataset weight : {} & bias: {}'.format(b,a))

Reference values from simulated dataset weight : 0.10458035217582123 & bias: 0.11878560346515568


#### Training pytorch model

In [42]:
model_sc = toynet()

optimizer_sc = optim.Adam(model_sc.parameters())
loss = nn.MSELoss()

In [43]:
wts_bias_progression= list()
all_grads= list()
losses= list()

d2loss_dg= list()
diags= list()
f= lambda x: np.sqrt(np.abs(np.diag(np.linalg.inv(x)))).tolist()

for epoch in range(4000):
    x_, y_ = sample(data_trans, count= 64)
    #x_,y_= data.values[:,0].reshape(-1,1), data.values[:,1]
    trainx = Variable(torch.Tensor(x_).float(), requires_grad= True)
    trainy = Variable(torch.reshape(torch.Tensor(y_).float(), (-1,1)))
    optimizer_sc.zero_grad()
    y_pred= model_sc(trainx)
    
    l = loss(y_pred, trainy)
    losses.append(l.item())
    if epoch%500==0:
        print('epoch: ', epoch, ' loss: ', l.item());
    
    l.backward()
    optimizer_sc.step()
    
    wts_bias = list(model_sc.parameters())
    wts_bias_vals= torch.cat((wts_bias[0], torch.reshape(wts_bias[1], (-1,1))),1)
    wts_bias_progression.append(wts_bias_vals.tolist())
    
    wts_bias_grads = torch.cat((wts_bias[0].grad, torch.reshape(wts_bias[1].grad, (-1,1))),1)
    #i_grads= trainx.grad
    all_grads.append(wts_bias_grads.tolist())#i_grads)
    d1loss_dg = torch.autograd.grad(loss(model_sc(trainx), trainy), wts_bias, create_graph=True)
    d2loss_dg.append(eval_hessian(d1loss_dg, model_sc))
    diags.append(f(d2loss_dg[-1]))

all_wts_bias = np.array(wts_bias_progression).reshape(-1, wts_bias_vals.shape[1])
all_wts_bias_df= pd.DataFrame(all_wts_bias, columns=['weight_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1])]+ ['bias'])

all_grads = np.array(all_grads).reshape(-1,wts_bias_vals.shape[1])
all_grads_df= pd.DataFrame(all_grads, columns=['beta_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1]+1)])

epoch:  0  loss:  2.634622097015381
epoch:  500  loss:  1.6234681606292725
epoch:  1000  loss:  1.1310476064682007
epoch:  1500  loss:  0.9287221431732178
epoch:  2000  loss:  0.8867921233177185
epoch:  2500  loss:  1.0071184635162354
epoch:  3000  loss:  0.9728839993476868
epoch:  3500  loss:  1.019735336303711


In [45]:
list(model_sc.parameters())

[Parameter containing:
 tensor([[0.1385]], requires_grad=True), Parameter containing:
 tensor([0.0001], requires_grad=True)]

#### Analytical standard error & T-value for b (slope estimator)

In [46]:
#S_b calc
sum_sqr_err = (1/(data_trans['features'].shape[0] - 2))*(np.sum(error**2))
sum_sqr_diffx = np.sum((data_trans['features'].values - np.mean(data_trans['features'].values))**2)
s_beta = np.sqrt(sum_sqr_err/sum_sqr_diffx)
s_beta
#(1/(1000-2))*(np.sum(error**2))/np.sum((data['features'].values - np.mean(data['features'].values))**2)

0.03281739536037299

In [47]:
b_pred = list(model_sc.parameters())[0] 
t= (b_pred - b)/s_beta
t#t-val

tensor([[1.0348]], grad_fn=<DivBackward0>)

#### Calculating Pytorch Standar error

In [48]:
mini_batches= list()#contains mini_batches of gradients from 200 epochs
mean_mini_batches= list()#contains mean of each of 200 epochs each in a mini_batches of gradients from 200 epochs
mini_size= 200
for batch in range(1, (len(all_grads))//mini_size):
    mini_batches.append(all_grads[batch*mini_size:batch*mini_size+ mini_size])
    mean_mini_batches.append(np.mean(mini_batches[-1], axis=0).tolist())

grad_mini_batch_arr = np.array(mean_mini_batches).T
grad_mini_batch_arr.shape

(2, 19)

In [49]:
covariant_mat= np.cov(grad_mini_batch_arr)
pytorch_stderr_sc= np.sqrt(covariant_mat.diagonal())#**2
pytorch_stderr_sc

array([0.4363688 , 0.36669437])

#### Stasmodel vs Pytorch comparison

In [50]:
summary= result_sc.summary()
statsm_df = pd.DataFrame(summary.tables[1].data[1:], columns=summary.tables[1].data[0])
statsm_df

Unnamed: 0,Unnamed: 1,coef,std err,t,P>|t|,[0.025,0.975]
0,x1,0.1413,0.031,4.51,0.0,0.08,0.203
1,const,0.0,0.031,0.0,1.0,-0.061,0.061


In [57]:
di = dict(zip('pytorch_grads_sc,simulated_params_sc,pytorch_params_sc,statsmodel_params_sc,statsmodel_stderr_sc'.split(','), [all_grads[-1,:], [b,a], all_wts_bias[-1,:], result_sc.params, statsm_df["std err"].values]))#all_grads
di.update({'pytorch_stderr_sc':pytorch_stderr_sc})
comp_df_sc = pd.DataFrame(di, index=['b (slope/weight)','a (intercept/bias)'])
#comp_df['pytorch_stderr']= pytorch_stderr
comp_df_sc['pytorch_params_sc'] = comp_df_sc['pytorch_params_sc'].apply(lambda x:np.round(x,3))
comp_df_sc['statsmodel_params_sc'] = comp_df_sc['statsmodel_params_sc'].apply(lambda x:np.round(x,3))
#comp_df['Simulated_params']= [b,a]
comp_df_sc

Unnamed: 0,pytorch_grads_sc,simulated_params_sc,pytorch_params_sc,statsmodel_params_sc,statsmodel_stderr_sc,pytorch_stderr_sc
b (slope/weight),-0.002663,0.10458,0.139,0.141,0.031,0.436369
a (intercept/bias),-0.07112,0.118786,0.0,0.0,0.031,0.366694


#### Scaled vs. Unscaled parameters

In [67]:
pd.concat([comp_df, comp_df_sc], axis=1)

Unnamed: 0,pytorch_grads,simulated_params,pytorch_params,statsmodel_params,statsmodel_stderr,pytorch_stderr,pytorch_grads_sc,simulated_params_sc,pytorch_params_sc,statsmodel_params_sc,statsmodel_stderr_sc,pytorch_stderr_sc
b (slope/weight),-1.481094,0.10458,0.104,0.104,0.023,0.352307,-0.002663,0.10458,0.139,0.141,0.031,0.436369
a (intercept/bias),-0.359463,0.118786,0.156,0.164,0.066,0.092303,-0.07112,0.118786,0.0,0.0,0.031,0.366694


### Experiment with smaller dataset

In [68]:
Xs = np.round(np.random.uniform(0,5, size= 20), 3)

a_s, b_s  = np.random.normal(0, 0.1), np.random.normal(0, 0.2)
error_s = np.random.normal(0, 1, size= 20)

In [69]:
f= lambda x,e : np.round(a_s +b_s*x + e, 3)
data_s = pd.DataFrame({'features': Xs, 'targets':f(Xs, error_s)})
sc= StandardScaler()
trans_mat_s = sc.fit_transform(data_s)

data_s['features'] = trans_mat_s[:,0]
data_s['targets']= trans_mat_s[:,1]

data_s

Unnamed: 0,features,targets
0,0.246088,1.880965
1,0.903562,-1.152908
2,-0.848669,-0.900387
3,-0.114438,0.984018
4,0.072415,-0.968269
5,-0.135371,-0.607137
6,0.31199,0.521515
7,1.781228,-0.041453
8,-0.360215,-0.654202
9,1.182678,0.329635


##### Statsmodel

In [70]:
import statsmodels.api as sm
feats_s, targets_s= data_s['features'].values, data_s['targets'].values
feats_s= sm.add_constant(feats_s, prepend=False)

In [71]:
#Without standard_scaling of data
model_stm_s = sm.OLS(targets_s, feats_s, hasconst=True)
result_s= model_stm_s.fit()
print(result_s.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.091
Model:                            OLS   Adj. R-squared:                  0.041
Method:                 Least Squares   F-statistic:                     1.812
Date:                Mon, 23 Mar 2020   Prob (F-statistic):              0.195
Time:                        09:23:04   Log-Likelihood:                -27.420
No. Observations:                  20   AIC:                             58.84
Df Residuals:                      18   BIC:                             60.83
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
x1             0.3024      0.225      1.346      0.1

In [72]:
#Without standard_scaling of data
print('From dataset weight : {} & bias: {}'.format(b_s,a_s))

From dataset weight : 0.10144210862371957 & bias: -0.2742068459149354


##### Pytorch model

In [73]:
model_s = toynet()
optimizer_s = optim.Adam(model_s.parameters())
loss_s = nn.MSELoss()

In [74]:
wts_bias_progression_s= list()
all_grads_s= list()
losses_s= list()

d2loss_dg_s= list()
diags_s= list()
f= lambda x: np.sqrt(np.abs(np.diag(np.linalg.inv(x)))).tolist()

for epoch in range(4000):
    x_, y_ = sample(data_s, count= 8)
    #x_,y_= data.values[:,0].reshape(-1,1), data.values[:,1]
    trainx = Variable(torch.Tensor(x_).float(), requires_grad= True)
    trainy = Variable(torch.reshape(torch.Tensor(y_).float(), (-1,1)))
    optimizer_s.zero_grad()
    y_pred= model_s(trainx)
    
    l = loss_s(y_pred, trainy)
    losses_s.append(l.item())
    if epoch%500==0:
        print('epoch: ', epoch, ' loss: ', l.item());
    
    l.backward()
    optimizer_s.step()
    
    wts_bias = list(model_s.parameters())
    wts_bias_vals= torch.cat((wts_bias[0], torch.reshape(wts_bias[1], (-1,1))),1)
    wts_bias_progression_s.append(wts_bias_vals.tolist())
    
    wts_bias_grads = torch.cat((wts_bias[0].grad, torch.reshape(wts_bias[1].grad, (-1,1))),1)
    #i_grads= trainx.grad
    all_grads_s.append(wts_bias_grads.tolist())#i_grads)
    d1loss_dg_s = torch.autograd.grad(loss_s(model_s(trainx), trainy), wts_bias, create_graph=True)
    d2loss_dg_s.append(eval_hessian(d1loss_dg_s, model_s))
    diags_s.append(f(d2loss_dg_s[-1]))

all_wts_bias_s = np.array(wts_bias_progression_s).reshape(-1,wts_bias_vals.shape[1])
all_wts_bias_df_s= pd.DataFrame(all_wts_bias_s, columns=['weight_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1])]+ ['bias'])

all_grads_s = np.array(all_grads_s).reshape(-1,wts_bias_vals.shape[1])
all_grads_df_s= pd.DataFrame(all_grads_s, columns=['beta_{}'.format(idx) for idx in range(1,wts_bias_vals.shape[1]+1)])


epoch:  0  loss:  0.7195181250572205
epoch:  500  loss:  0.8909294009208679
epoch:  1000  loss:  0.9296377897262573
epoch:  1500  loss:  0.38164255023002625
epoch:  2000  loss:  0.6488150358200073
epoch:  2500  loss:  0.4885692000389099
epoch:  3000  loss:  0.9722121953964233
epoch:  3500  loss:  1.1143834590911865


##### Calculating Pytorch Standar error

In [75]:
mini_batches_s= list()#contains mini_batches of gradients from 200 epochs
mean_mini_batches_s= list()#contains mean of each of 200 epochs each in a mini_batches of gradients from 200 epochs
mini_size= 200
for batch in range(1, (len(all_grads_s))//mini_size):
    mini_batches_s.append(all_grads_s[batch*mini_size:batch*mini_size+ mini_size])
    mean_mini_batches_s.append(np.mean(mini_batches_s[-1], axis=0).tolist())

grad_mini_batch_arr_s = np.array(mean_mini_batches_s).T
grad_mini_batch_arr_s.shape

(2, 19)

In [76]:
covariant_mat_s= np.cov(grad_mini_batch_arr_s)
pytorch_stderr_s= np.sqrt(covariant_mat_s.diagonal())#**2
pytorch_stderr_s

array([0.09007549, 0.19764432])

In [77]:
summary_s= result_s.summary()
statsm_df_s = pd.DataFrame(summary_s.tables[1].data[1:], columns=summary_s.tables[1].data[0])
statsm_df_s

Unnamed: 0,Unnamed: 1,coef,std err,t,P>|t|,[0.025,0.975]
0,x1,0.3024,0.225,1.346,0.195,-0.17,0.774
1,const,0.0,0.225,0.0,1.0,-0.472,0.472


In [78]:
#Without standard_scaling
di_s = dict(zip('pytorch_grads,simulated_params,pytorch_params,statsmodel_params,statsmodel_stderr'.split(','), [all_grads_s[-1,:], [b_s,a_s], all_wts_bias_s[-1,:], result_s.params, statsm_df_s["std err"].values]))#all_grads
di_s.update({'pytorch_stderr':pytorch_stderr_s})
comp_df_s = pd.DataFrame(di_s, index=['b (slope/weight)','a (intercept/bias)'])
comp_df_s['pytorch_params'] = comp_df_s['pytorch_params'].apply(lambda x:np.round(x,3))
comp_df_s['statsmodel_params'] = comp_df_s['statsmodel_params'].apply(lambda x:np.round(x,3))
#comp_df['Simulated_params']= [b,a]
comp_df_s

Unnamed: 0,pytorch_grads,simulated_params,pytorch_params,statsmodel_params,statsmodel_stderr,pytorch_stderr
b (slope/weight),-0.266607,0.101442,0.308,0.302,0.225,0.090075
a (intercept/bias),0.256882,-0.274207,-0.0,0.0,0.225,0.197644


##### Standard error & T-value for b (slope estimator)

In [79]:
#S_b calc
sum_sqr_err_s = (1/(data_s['features'].shape[0] - 2))*(np.sum(error_s**2))
sum_sqr_diffx_s = np.sum((data_s['features'].values - np.mean(data_s['features'].values))**2)
s_beta_s = np.sqrt(sum_sqr_err_s/sum_sqr_diffx_s)
s_beta_s
#(1/(1000-2))*(np.sum(error**2))/np.sum((data['features'].values - np.mean(data['features'].values))**2)

0.2553713206689374

In [80]:
# t-value = (b'- b)/(S_b)
b_pred_s = list(model_s.parameters())[0] 
t_s= (b_pred_s - b_s)/s_beta_s
t_s

tensor([[0.8091]], grad_fn=<DivBackward0>)

* With standard_scaling & glorot normal kernel init, values of a & b vary farther from originally simulated for both pytorch & statsmodel
* Statsmodel returned equal std error for a & b in case of both dataset_1 & dataset_2; Whereas pytroch std_Error values were different.
* For dataset 1, pytorch a & b and statsmodel a & looked almost similar, but Both were far from simulated a & b vals.
* For dataset 2, all 3 a & b pairs from simulated, pytorch & statsmodel differed significantly.