# OLSAbsorb: Absorbing categorical variables in OLS

One of the main usecases for this is absorbing fixed effects in panel data.

The fixed effects are included as sparse dummy matrix and partialled out of the main explanatory variables.

**Note** the first part is broken because I changed the structure of the simulated data. (IIRC)

In [1]:
import time

import numpy as np
from scipy import sparse
import pandas as pd

from statsmodels.regression.linear_model import OLS
from statsmodels.regression.special_linear_model import OLSAbsorb, cat2dummy_sparse
from statsmodels.tools._sparse import PartialingSparse, dummy_sparse


In [2]:
k_cat1, k_cat2 = 500, 100

keep = (np.random.rand(k_cat1 * k_cat2) > 0.1).astype(bool)

xcat1 = np.repeat(np.arange(k_cat1), k_cat2)[keep]
xcat2 = np.tile(np.arange(k_cat2), k_cat1)[keep]
exog_absorb = np.column_stack((xcat1, xcat2))
nobs = len(xcat1)

exog_sparse = cat2dummy_sparse(exog_absorb)
beta_sparse = 1. / np.r_[np.arange(1, k_cat1), np.arange(1, k_cat2 + 1)]

np.random.seed(999)
beta_dense = np.ones(3)
exog_dense = np.column_stack((np.ones(exog_sparse.shape[0]), np.random.randn(exog_sparse.shape[0], len(beta_dense) - 1)))
y = exog_dense.dot(beta_dense) + exog_sparse.dot(beta_sparse) + 0.01 * np.random.randn(nobs)


In [3]:
exog_absorb.shape, exog_sparse.shape, beta_sparse.shape

((44928, 2), (44928, 599), (599,))

In [4]:
t0 = time.time()
mod_absorb = OLSAbsorb(y, exog_dense, exog_absorb)
res_absorb = mod_absorb.fit()
t1 = time.time()
print('time: ', t1 - t0)

time:  0.06258296966552734


In [5]:
print(res_absorb.summary())

                         OLSAbsorb Regression Results                         
Dep. Variable:                      y   R-squared:                       1.000
Model:                      OLSAbsorb   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.517e+06
Date:                Sun, 03 Apr 2016   Prob (F-statistic):               0.00
Time:                        15:58:50   Log-Likelihood:             1.4362e+05
No. Observations:               44928   AIC:                        -2.860e+05
Df Residuals:                   44327   BIC:                        -2.808e+05
Df Model:                         600                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0657    4.7e-05   2.27e+04      0.0

In [6]:
xcat1

array([  0,   0,   0, ..., 499, 499, 499])

In [7]:
xcat2

array([ 0,  1,  2, ..., 97, 98, 99])

In [8]:
locals().keys()

dict_keys(['_ih', '_i5', '__doc__', '_i1', 'xcat1', '__name__', '__builtin__', '_iii', 'pd', 'quit', '__builtins__', 't1', '_i8', 'dummy_sparse', '_i2', '_i6', '_7', 'exog_sparse', 'np', 'beta_dense', 'OLSAbsorb', '_i', 'mod_absorb', 'sparse', 'beta_sparse', 't0', '_ii', 'k_cat2', '_', 'PartialingSparse', '_3', 'get_ipython', '_i7', '_i3', '__package__', 'exog_absorb', '_dh', '___', '_i4', 'xcat2', '_oh', '_6', '__spec__', 'cat2dummy_sparse', '_sh', 'exog_dense', '__loader__', 'res_absorb', 'Out', 'OLS', 'In', '__', 'nobs', 'keep', 'y', 'exit', 'time', 'k_cat1'])

In [9]:
exog_sparse.nnz

89761

In [10]:
exog_sparse


<44928x599 sparse matrix of type '<class 'numpy.float64'>'
	with 89761 stored elements in Compressed Sparse Row format>

In [11]:
exog_sparse.T.dot(exog_sparse)

<599x599 sparse matrix of type '<class 'numpy.float64'>'
	with 90265 stored elements in Compressed Sparse Column format>

In [12]:
xcat2.reshape(k_cat1, k_cat2)[:20, :20]

ValueError: total size of new array must be unchanged

In [13]:
xm = exog_dense[:,-1].reshape(k_cat1, k_cat2)

ValueError: total size of new array must be unchanged

In [14]:
xm -= xm.mean(1)[:,None]
xm -= xm.mean(0)

NameError: name 'xm' is not defined

In [None]:
xm[:5, :5]

In [None]:
res_absorb.model.exog[:20, -1]

In [None]:
xm.ravel()[:20]

In [None]:
np.max(np.abs(xm.ravel() - res_absorb.model.exog[:, -1]))

In [None]:
(xm.ravel(), res_absorb.model.exog[:, -1])[:20]

In [None]:
xm = exog_dense.reshape(-1, k_cat1, k_cat2)
xm -= xm.mean(1, keepdims=True)
xm -= xm.mean(2, keepdims=True)
np.max(np.abs(xm.reshape(-1, exog_dense.shape[-1]) - res_absorb.model.exog))

In [None]:
xm = exog_dense.reshape(-1, k_cat1, k_cat2)
xm -= np.nanmean(xm, axis=1, keepdims=True)
xm -= np.nanmean(xm, axis=2, keepdims=True)
np.max(np.abs(xm.reshape(-1, exog_dense.shape[-1]) - res_absorb.model.exog))

In [None]:
k_cat = (k_cat1, k_cat2)
xm = exog_dense.reshape(-1, *k_cat)
for axis in range(xm.ndim):
    xm -= np.nanmean(xm, axis=axis, keepdims=True)
np.max(np.abs(xm.reshape(-1, exog_dense.shape[-1]) - res_absorb.model.exog))

In [15]:
from pandas import DataFrame
np.random.seed(1234)
df = DataFrame({'A' : np.random.randint(0,10,size=100), 'B' : np.random.randn(100)})
df['C'] = df['B'] - df.groupby('A')['B'].transform('mean')
df.head()
df[df.A==3]

Unnamed: 0,A,B,C
0,3,0.299347,0.412738
21,3,-0.063758,0.049633
26,3,-1.000889,-0.887498
28,3,0.15952,0.272911
30,3,0.02834,0.141731
32,3,-0.712358,-0.598967
51,3,-0.288721,-0.17533
60,3,0.4189,0.532291
99,3,0.139099,0.25249


In [16]:
df[df.A==3].B.mean()

-0.11339119534010732

In [17]:
df.head()

Unnamed: 0,A,B,C
0,3,0.299347,0.412738
1,6,0.127277,0.161831
2,5,0.92619,0.838768
3,4,2.45524,1.855498
4,8,-0.32089,0.129203


In [18]:
df['B'] -= df.groupby('A')['B'].transform('mean')
df.head()

Unnamed: 0,A,B,C
0,3,0.412738,0.412738
1,6,0.161831,0.161831
2,5,0.838768,0.838768
3,4,1.855498,1.855498
4,8,0.129203,0.129203


In [19]:
np.random.seed(1234)
df = DataFrame({'A' : np.random.randint(0,10,size=100), 'B' : np.random.randn(100), 'D' : np.random.randn(100)})
df[['B', 'D']] -= df.groupby('A')[['B', 'D']].transform('mean')
df[df.A==3].B.mean()

-6.1679056923619804e-18

In [20]:
df[df.A==3]

Unnamed: 0,A,B,D
0,3,0.412738,-0.2575
21,3,0.049633,-0.613769
26,3,-0.887498,-0.414497
28,3,0.272911,1.268343
30,3,0.141731,-0.714718
32,3,-0.598967,0.549872
51,3,-0.17533,-0.013277
60,3,0.532291,1.490721
99,3,0.25249,-1.295175


In [21]:
np.random.seed(1234)
df = DataFrame({'A' : np.random.randint(0,10,size=100), 'B' : np.random.randn(100), 'D' : np.random.randn(100)})
df2 = df[['B', 'D']].copy()
df2[df2.columns] -= df2.groupby(df['A'])[df2.columns].transform('mean')
df2[df.A==3].B.mean()

-6.1679056923619804e-18

In [22]:
import pandas as pd
pd.__version__

'0.17.0'

In [23]:
# with unbalanced panel

k_cat = (k_cat1, k_cat2)
xm = np.empty(exog_dense.shape[1:] + k_cat)
xm.fill(np.nan)
xm[:, xcat1, xcat2] = exog_dense.T
for it in range(3):
    for axis in range(1, xm.ndim):
        xm = xm - np.nanmean(xm, axis=axis, keepdims=True)
np.max(np.abs(xm.reshape(exog_dense.shape[-1], -1).T[keep] + exog_dense.mean(0) - res_absorb.model.wexog), axis=0)

array([  2.18292051e-12,   1.31658505e-08,   1.17322930e-08])

In [24]:
np.mean(np.abs(xm.reshape(exog_dense.shape[-1], -1).T[keep] - res_absorb.model.wexog), axis=0)

array([  1.00000000e+00,   5.41525565e-03,   2.46788810e-04])

In [25]:
exog_dense.mean(0)

array([  1.00000000e+00,   5.41525565e-03,  -2.46788810e-04])

In [26]:
xm.shape

(3, 500, 100)

In [27]:
xm[2, :5, :5].T

array([[ 1.26394181,  0.46584819,  0.17011328,  1.06900329,         nan],
       [-1.02487023, -1.762962  , -1.52234638,  0.03682222,  0.99348826],
       [-0.83472351,  0.69393611,  0.77325961,  0.36375427, -0.49617988],
       [-2.28437002,         nan,  0.46866376,  0.33590589,  0.47699803],
       [ 0.88806842,  0.2459097 ,  1.17683859, -0.12981495, -0.11435113]])

In [28]:
xm.reshape(exog_dense.shape[-1], -1).T[:35]

array([[ 0.        ,  0.0618275 ,  1.26394181],
       [ 0.        ,  0.32023664, -1.02487023],
       [ 0.        , -0.33022879, -0.83472351],
       [ 0.        ,  1.56766166, -2.28437002],
       [ 0.        ,  1.4179184 ,  0.88806842],
       [ 0.        , -0.5359787 ,  0.14074843],
       [ 0.        , -1.15966516, -0.74430016],
       [ 0.        , -0.19365902,  0.69566126],
       [ 0.        , -0.22343062,  1.4047113 ],
       [ 0.        ,  0.96204049, -1.02748029],
       [ 0.        , -0.30124072, -1.03084757],
       [ 0.        , -0.75626532, -1.55046141],
       [        nan,         nan,         nan],
       [ 0.        ,  1.59802982,  2.00746502],
       [        nan,         nan,         nan],
       [ 0.        , -0.01003573, -1.92520494],
       [ 0.        , -0.40861778, -0.56808069],
       [ 0.        ,  0.58939296,  0.3435565 ],
       [ 0.        , -1.75669705, -1.48613444],
       [ 0.        ,  0.4545901 , -0.16261058],
       [ 0.        , -0.51823586,  0.427

In [29]:
res_absorb.model.wexog[:5]

array([[ 1.        ,  0.06724276,  1.26369502],
       [ 1.        ,  0.3256519 , -1.02511702],
       [ 1.        , -0.32481353, -0.8349703 ],
       [ 1.        ,  1.57307692, -2.2846168 ],
       [ 1.        ,  1.42333365,  0.88782163]])

In [30]:
(1 - np.isnan(xm)).sum(2)

array([[95, 95, 88, ..., 86, 91, 91],
       [95, 95, 88, ..., 86, 91, 91],
       [95, 95, 88, ..., 86, 91, 91]])

In [31]:
keep[:25]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True], dtype=bool)

In [32]:
exog_dense.shape + k_cat

(44928, 3, 500, 100)

In [33]:
xm.shape

(3, 500, 100)

In [34]:
xm[:, xcat1, xcat2].shape

(3, 44928)

In [35]:
xm.shape

(3, 500, 100)

In [36]:
xm[-1, :5,:5]

array([[ 1.26394181, -1.02487023, -0.83472351, -2.28437002,  0.88806842],
       [ 0.46584819, -1.762962  ,  0.69393611,         nan,  0.2459097 ],
       [ 0.17011328, -1.52234638,  0.77325961,  0.46866376,  1.17683859],
       [ 1.06900329,  0.03682222,  0.36375427,  0.33590589, -0.12981495],
       [        nan,  0.99348826, -0.49617988,  0.47699803, -0.11435113]])

In [37]:
xm.reshape(-1, exog_dense.shape[-1])[:15]


array([[  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [ nan,   0.,  nan],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,  nan,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.],
       [  0.,   0.,   0.]])

In [38]:
keep[:15]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True, False], dtype=bool)

In [39]:
res_absorb.model.exog[:15]

array([[ 1.        ,  0.12715784,  1.40189088],
       [ 1.        ,  0.31481499, -0.85844916],
       [ 1.        , -0.26613444, -0.64890071],
       [ 1.        ,  1.56626757, -2.09137019],
       [ 1.        ,  1.45632806,  0.94529342],
       [ 1.        , -0.40020119,  0.3152273 ],
       [ 1.        , -1.11006083, -0.58482153],
       [ 1.        , -0.18840956,  0.81302365],
       [ 1.        , -0.16130472,  1.60087155],
       [ 1.        ,  0.98434258, -0.83544737],
       [ 1.        , -0.18664934, -0.85806707],
       [ 1.        , -0.75977816, -1.51205424],
       [ 1.        ,  1.74777474,  2.13005498],
       [ 1.        ,  0.07115422, -1.84164224],
       [ 1.        , -0.3525907 , -0.40890379]])

In [40]:
(1-keep).sum()

5072

In [41]:
keep.shape

(50000,)

In [42]:
k_cat = (k_cat1, k_cat2)
xm = np.empty(exog_dense.shape[1:] + k_cat)
xm.fill(np.nan)
xm2 = xm.copy()
xm[:, xcat1, xcat2] = exog_dense.T
xm2[2, xcat1, xcat2] = exog_dense[:, 2]

In [43]:
xm[:, :5, :5]

array([[[ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
        [ 1.        ,  1.        ,  1.        ,         nan,  1.        ],
        [ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
        [ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
        [        nan,  1.        ,  1.        ,  1.        ,  1.        ]],

       [[ 0.12715784,  0.31481499, -0.26613444,  1.56626757,  1.45632806],
        [ 0.77265645,  0.76373973,  0.79221509,         nan,  0.27631403],
        [ 0.03066691, -0.47453864,  1.77630876, -1.21232846, -0.99585271],
        [ 0.97388496, -0.48073742,  0.42265325, -1.02174976,  0.41540718],
        [        nan,  0.37263869,  1.26175176, -0.32165036, -0.46199352]],

       [[ 1.40189088, -0.85844916, -0.64890071, -2.09137019,  0.94529342],
        [ 0.29786262, -1.90247557,  0.57382427,         nan, -0.00279995],
        [ 0.07698096, -1.5870067 ,  0.72800102,  0.43058219,  1.00298219],
        [ 1.09573095,

In [44]:
np.nonzero(np.isnan(xm[2, :, :].ravel()))[0][:5]

array([12, 14, 28, 70, 80], dtype=int64)

In [45]:
exog_dense[:15, 2]

array([ 1.40189088, -0.85844916, -0.64890071, -2.09137019,  0.94529342,
        0.3152273 , -0.58482153,  0.81302365,  1.60087155, -0.83544737,
       -0.85806707, -1.51205424,  2.13005498, -1.84164224, -0.40890379])

In [46]:
exog_dense[500:515, 2]

array([ 1.06431957, -0.45038494,  0.15887889,  1.73184946,  0.67221779,
        1.30466212,  0.10767882, -0.7530067 ,  0.22635772,  0.46991989,
       -0.26436464,  1.16655405,  1.36775412,  0.13920489,  1.42370617])

In [47]:
exog_dense[xcat1 == 2][:5]

array([[ 1.        ,  0.03066691,  0.07698096],
       [ 1.        , -0.47453864, -1.5870067 ],
       [ 1.        ,  1.77630876,  0.72800102],
       [ 1.        , -1.21232846,  0.43058219],
       [ 1.        , -0.99585271,  1.00298219]])

In [48]:
np.nanmean(xm, axis=axis, keepdims=True)[:10]

array([[[ 1.        ],
        [ 1.        ],
        [ 1.        ],
        ..., 
        [ 1.        ],
        [ 1.        ],
        [ 1.        ]],

       [[ 0.03411261],
        [-0.18137585],
        [ 0.01521646],
        ..., 
        [-0.10891095],
        [ 0.06244571],
        [-0.02150389]],

       [[ 0.1407578 ],
        [-0.16312331],
        [-0.08665161],
        ..., 
        [-0.16764642],
        [-0.14435056],
        [ 0.18666585]]])

In [49]:
xcat2[:20]

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 13, 15, 16, 17, 18,
       19, 20, 21])

In [50]:
np.nonzero(1 - keep)[0][:5]

array([12, 14, 28, 70, 80], dtype=int64)

In [51]:
exog_absorb[:35]

array([[ 0,  0],
       [ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0,  8],
       [ 0,  9],
       [ 0, 10],
       [ 0, 11],
       [ 0, 13],
       [ 0, 15],
       [ 0, 16],
       [ 0, 17],
       [ 0, 18],
       [ 0, 19],
       [ 0, 20],
       [ 0, 21],
       [ 0, 22],
       [ 0, 23],
       [ 0, 24],
       [ 0, 25],
       [ 0, 26],
       [ 0, 27],
       [ 0, 29],
       [ 0, 30],
       [ 0, 31],
       [ 0, 32],
       [ 0, 33],
       [ 0, 34],
       [ 0, 35],
       [ 0, 36],
       [ 0, 37]])

In [52]:
xm[2, :5, :5]

array([[ 1.40189088, -0.85844916, -0.64890071, -2.09137019,  0.94529342],
       [ 0.29786262, -1.90247557,  0.57382427,         nan, -0.00279995],
       [ 0.07698096, -1.5870067 ,  0.72800102,  0.43058219,  1.00298219],
       [ 1.09573095,  0.09202187,  0.43835566,  0.4176843 , -0.18381137],
       [        nan,  1.05032622, -0.41994018,  0.56041475, -0.16670924]])

In [53]:
np.nanmean(xm2[2, :,:], axis=0, keepdims=True)[:, :5]

array([[-0.00617544,  0.02179778,  0.04245469,  0.05077307, -0.08571387]])

In [54]:
exog_dense[xcat2 == 2, 2].mean()

0.042454688732928542

In [55]:
k_cat


(500, 100)

In [56]:
np.isnan(xm[2, :, :].ravel()[keep]).any()

False

In [57]:
exog_dense.shape

(44928, 3)

In [58]:
(xm2[2, :,:] - np.nanmean(xm2[2, :,:], axis=0, keepdims=True)).shape

(500, 100)

In [59]:
def _group_demean_iterative(exog_dense, groups, add_mean=True, max_iter=10, atol=1e-8, get_groupmeans=False):
    """iteratively demean an array for two-way fixed effects
    
    This is intended for almost balanced panels. The data is converted
    to a 3-dimensional array with nans for missing cells.
    
    currently works only for two-way effects
    groups have to be integers corresponding to range(k_cati)
    
    no input error checking
    
    This function will change as more options and special cases are
    included.
    
    Parameters
    ----------
    exog_dense : 2d ndarray
        data with observations in rows and variables in columns.
        This array will currently not be modified.
    groups : 2d ndarray, int
        groups labels specified as consecutive integers starting at zero
    max_iter : int
        maximum number of iterations
    atol : float
        tolerance for convergence. Convergence is achieved if the
        maximum absolute change (np.ptp) is smaller than atol.
        
    Returns
    -------
    ex_dm_w : ndarray
        group demeaned exog_dense array in wide format
    ex_dm : ndarray
        group demeaned exog_dense array in long format
    it : int
        number of iterations used. If convergence has not been
        achieved then it will be equal to max_iter - 1
    
    """
    # with unbalanced panel

    k_cat = tuple((groups.max(0) + 1).tolist())
    xm = np.empty(exog_dense.shape[1:] + k_cat)
    xm.fill(np.nan)
    xm[:, groups[:, 0], groups[:, 1]] = exog_dense.T
    # for final group means
    gmean = []
    if get_groupmeans:
        gmean = [np.nanmean(xm, axis=axis) for axis in range(len(k_cat))]
    keep = ~np.isnan(xm[0]).ravel()
    finished = False
    for it in range(max_iter):
        for axis in range(1, xm.ndim):
            group_mean = np.nanmean(xm, axis=axis, keepdims=True)
            xm -= group_mean
            if np.ptp(group_mean) < atol:
                finished = True
                break
        if finished:
            break
    
    xd = xm.reshape(exog_dense.shape[-1], -1).T[keep]
    if add_mean:
        xmean = exog_dense.mean(0)
        xd += xmean
        xm += xmean[:, None, None]
    return xm, xd, it

xm, xd, it = _group_demean_iterative(exog_dense, exog_absorb, max_iter=50, add_mean=False)
xm.shape, it

((3, 500, 100), 3)

In [60]:
np.max(np.abs(xm.reshape(exog_dense.shape[-1], -1).T[keep] + exog_dense.mean(0) - res_absorb.model.wexog), axis=0)

array([  2.18292051e-12,   2.44853027e-11,   2.45419240e-11])

In [61]:
np.max(np.abs(xd + exog_dense.mean(0) - res_absorb.model.wexog), axis=0)

array([  2.18292051e-12,   2.44853027e-11,   2.45419240e-11])

In [62]:
xm, xd, it = _group_demean_iterative(exog_dense, exog_absorb, max_iter=50, add_mean=True)
np.max(np.abs(xd - res_absorb.model.wexog), axis=0)

array([  2.18292051e-12,   2.44853027e-11,   2.45419240e-11])

In [63]:
xd.shape

(44928, 3)

In [64]:
ym, yd, it = _group_demean_iterative(y[:,None], exog_absorb, max_iter=50, add_mean=True)

In [65]:
mod_ols2 = OLS(yd, xd)
ddof = k_cat1 + k_cat2 - 2
mod_ols2.df_resid = mod_ols2.df_resid - ddof
mod_ols2.df_model = mod_ols2.df_model + ddof
res_ols2 = mod_ols2.fit()

In [66]:
res_ols2.params


array([ 1.06568984,  0.99998518,  0.99991756])

In [67]:
res_absorb.params

array([ 1.06568984,  0.99998518,  0.99991756])

In [68]:
res_ols2.bse

array([  4.70042741e-05,   4.72122565e-05,   4.75344441e-05])

In [69]:
res_absorb.bse

array([  4.70042741e-05,   4.72122565e-05,   4.75344441e-05])

In [70]:
res_ols2.bse / res_absorb.bse

array([ 1.,  1.,  1.])

In [71]:
ddof

598

In [72]:
res_ols2.df_resid, res_absorb.df_resid, res_ols2.df_model, res_absorb.df_model

(44327.0, 44327.0, 600.0, 600.0)

In [73]:
print(res_ols2.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.484e+06
Date:                Sun, 03 Apr 2016   Prob (F-statistic):               0.00
Time:                        16:01:10   Log-Likelihood:             1.4362e+05
No. Observations:               44928   AIC:                        -2.860e+05
Df Residuals:                   44327   BIC:                        -2.808e+05
Df Model:                         600                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0657    4.7e-05   2.27e+04      0.0

In [74]:
print(res_absorb.summary())

                         OLSAbsorb Regression Results                         
Dep. Variable:                      y   R-squared:                       1.000
Model:                      OLSAbsorb   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.517e+06
Date:                Sun, 03 Apr 2016   Prob (F-statistic):               0.00
Time:                        16:01:10   Log-Likelihood:             1.4362e+05
No. Observations:               44928   AIC:                        -2.860e+05
Df Residuals:                   44327   BIC:                        -2.808e+05
Df Model:                         600                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.0657    4.7e-05   2.27e+04      0.0

In [75]:
xx = np.array((1e4, 10))
dfxx = pd.DataFrame(xx)
dfxx.values.base is xx.base

False