In [1]:
import numpy as np
import pandas as pd
pd.set_option('mode.copy_on_write', True)
import statsmodels.api as sm

In [2]:
rng = np.random.default_rng()

In [3]:
n = 100
p = 10
y = rng.normal(10, 2, size=n)  # Thing we're predicting.
X1 = np.ones((n, p))  # Regressors.  First column is for the intercept.
# Things we're predicting with
regressors = rng.normal(5, 1, size=(n, p-1)) 
# Put these into the design
X1[:, 1:] = regressors

First design - no subtraction:

In [4]:
model1 = sm.OLS(y, X1)
fit1 = model1.fit()
fit1.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.053
Model:,OLS,Adj. R-squared:,-0.042
Method:,Least Squares,F-statistic:,0.5564
Date:,"Mon, 06 Mar 2023",Prob (F-statistic):,0.829
Time:,08:48:45,Log-Likelihood:,-208.89
No. Observations:,100,AIC:,437.8
Df Residuals:,90,BIC:,463.8
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,11.0963,2.988,3.713,0.000,5.160,17.033
x1,-0.1313,0.224,-0.587,0.559,-0.575,0.313
x2,-0.2034,0.211,-0.962,0.338,-0.623,0.216
x3,-0.1586,0.214,-0.740,0.461,-0.584,0.267
x4,-0.0556,0.240,-0.232,0.817,-0.532,0.421
x5,0.0091,0.224,0.040,0.968,-0.436,0.454
x6,0.1411,0.217,0.649,0.518,-0.291,0.573
x7,0.1338,0.206,0.649,0.518,-0.276,0.543
x8,-0.1839,0.203,-0.907,0.367,-0.587,0.219

0,1,2,3
Omnibus:,2.749,Durbin-Watson:,1.93
Prob(Omnibus):,0.253,Jarque-Bera (JB):,2.274
Skew:,-0.362,Prob(JB):,0.321
Kurtosis:,3.149,Cond. No.,218.0


Second model uses subtraction, but gives the same fitted values etc.  Notice we leave the first regressor in place, and replace the rest of the regressors with the differences.

In [5]:
X2 = X1.copy()
differences = np.diff(regressors, axis=1)
# Notice leaving column of ones and first regressor in place.
X2[:, 2:] = differences

In [6]:
model2 = sm.OLS(y, X2)
fit2 = model2.fit()
fit2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.053
Model:,OLS,Adj. R-squared:,-0.042
Method:,Least Squares,F-statistic:,0.5564
Date:,"Mon, 06 Mar 2023",Prob (F-statistic):,0.829
Time:,08:48:45,Log-Likelihood:,-208.89
No. Observations:,100,AIC:,437.8
Df Residuals:,90,BIC:,463.8
Df Model:,9,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,11.0963,2.988,3.713,0.000,5.160,17.033
x1,-0.2811,0.600,-0.469,0.640,-1.472,0.910
x2,-0.1498,0.590,-0.254,0.800,-1.322,1.023
x3,0.0536,0.544,0.099,0.922,-1.027,1.134
x4,0.2123,0.500,0.424,0.672,-0.781,1.206
x5,0.2678,0.442,0.605,0.547,-0.611,1.147
x6,0.2588,0.399,0.648,0.519,-0.534,1.052
x7,0.1176,0.338,0.348,0.728,-0.553,0.788
x8,-0.0162,0.314,-0.052,0.959,-0.639,0.607

0,1,2,3
Omnibus:,2.749,Durbin-Watson:,1.93
Prob(Omnibus):,0.253,Jarque-Bera (JB):,2.274
Skew:,-0.362,Prob(JB):,0.321
Kurtosis:,3.149,Cond. No.,80.6


Notice the R-squared and the F-statistic are exactly the same with the two models.  In fact, the predicted values are also the same (within the precision of the calculations):

In [7]:
predicted1 = fit1.predict()
predicted2 = fit2.predict()
np.allclose(predicted1, predicted2)

True