In [65]:
import numpy as np
from scipy.optimize import minimize

def u_minus(x, r1):
    return np.where(x < r1, x, 0)

def u_plus(x, r2):
    return np.where(x > r2, x, 0)

def W(X, Z, r1, r2):
    return np.column_stack((u_minus(X, r1), u_plus(X, r2), Z))

def objective(params, Y, X, Z):
    r1, r2  = params
    
    Y_bar = np.mean(Y)
    W_bar = np.mean(W(X, Z, r1, r2), axis=0)
    # print(Z)
    # print("1",W(X, Z, r1, r2))
    # print("2",W_bar)
    Y_dot = Y - Y_bar
    W_dot = W(X, Z, r1, r2) - W_bar
    # print("3",W_dot)
    eta = np.linalg.inv(W_dot.T @ W_dot) @ W_dot.T @ Y_dot
    # print("4",np.sum((Y_dot - W_dot @ eta)**2))
    return np.sum((Y_dot - W_dot @ eta)**2)





# example usage
X = np.random.normal(size=10)
Z = np.random.normal(size=(10, 3))
Y = 2*u_minus(X, -1) + 3*u_plus(X, 1) + Z @ np.array([1, 2, 3]) + np.random.normal(size=10)

result = minimize(objective, x0=np.asarray([-0.9,0.9]).reshape(2,), args=(Y, X, Z),  method='Nelder-Mead',)

print(f"Estimated parameters:  r1: {result}")


r1, r2  = result.x
# print(W(X, Z, r1, r2))
Y_bar = np.mean(Y)
W_bar = np.mean(W(X, Z, r1, r2), axis=0)
# print("1",W(X, Z, r1, r2))
# print("2",W_bar)
Y_dot = Y - Y_bar
W_dot = W(X, Z, r1, r2) - W_bar
# print("3",W_dot)
eta = np.linalg.inv(W_dot.T @ W_dot) @ W_dot.T @ Y_dot


Estimated parameters:  r1:        message: Optimization terminated successfully.
       success: True
        status: 0
           fun: 3.228618872383598
             x: [-9.000e-01  9.000e-01]
           nit: 10
          nfev: 39
 final_simplex: (array([[-9.000e-01,  9.000e-01],
                       [-9.001e-01,  9.000e-01],
                       [-9.000e-01,  9.001e-01]]), array([ 3.229e+00,  3.229e+00,  3.229e+00]))


In [67]:
import numpy as np
from scipy.optimize import minimize

np.random.seed(12345)
n = 2000
x = np.random.normal(size=n)
q = np.random.normal(size=n)
z = np.random.normal(size=n)

def neg_part2(x, r):
    return x * (x < r)

def pos_part2(x, r):
    return x * (x > r)

def reg(X, y):
    Q, R = np.linalg.qr(X)
    bols = np.dot(np.linalg.inv(R), np.dot(Q.T, y))
    return bols

def sse(r, y, x, z):
    r1 = r[0]
    r2 = r[1]
    if r1 < r2:
        xx = np.column_stack((neg_part2(x, r1), pos_part2(x, r2), z))
    else:
        xx = np.column_stack((neg_part2(x, r2), pos_part2(x, r1), z))
    
    b = reg(xx, y)
    return np.sum((y - np.dot(xx, b))**2)

def sse2(r, y, x, z):
    r1 = r[0]
    r2 = r[1]
    
    r1 = -1 / (1 + np.exp(r1)) + np.exp(r1) / (1 + np.exp(r1))
    r2 = -1 / (1 + np.exp(r2)) + np.exp(r2) / (1 + np.exp(r2))
    
    if r1 < r2:
        xx = np.column_stack((neg_part2(x, r1), pos_part2(x, r2), z))
    else:
        xx = np.column_stack((neg_part2(x, r2), pos_part2(x, r1), z))
    
    b = reg(xx, y)
    return np.sum((y - np.dot(xx, b))**2)

x1 = np.column_stack((neg_part2(x, -0.2), pos_part2(x, 0.2), z))
b0 = np.array([2, 2, 1])
y = np.dot(x1, b0) + np.random.normal(size=n)

r01 = np.percentile(x, 0.1)
r02 = np.percentile(x, 0.9)


# res = minimize(sse, x0=[-0.5, 0.5], args=(y, x, z), method='Nelder-Mead')

# res = minimize(sse, x0=[-0.5, 0.5], args=(y, x, z), method='Nelder-Mead', )

res = minimize(sse2, x0=[-0.5, 0.5], args=(y, x, z), method='Nelder-Mead')



r1 = res.x[0]
r2 = res.x[1]
r1 = -1 / (1 + np.exp(r1)) + np.exp(r1) / (1 + np.exp(r1))
r2 = -1 / (1 + np.exp(r2)) + np.exp(r2) / (1 + np.exp(r2))
print(r1)
print(r2)


-0.21979196165274512
0.2646422782009617


In [52]:
neg_part2(x, -0.2)

array([-0.20470766,  0.        , -0.51943872, ..., -0.2424589 ,
       -3.05698974,  0.        ])

In [33]:
eta

array([2.55706282, 2.83112037, 1.38073722, 2.34866909, 3.28611982])

In [41]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# 创建一个示例数据集
data = pd.DataFrame({
    'group': ['A', 'A', 'B', 'B', 'C', 'C'],
    'time': [1, 2, 1, 2, 1, 2],
    'y': [3, 4, 5, 6, 7, 8]
})

# 将group列设置为分类变量
data['group'] = pd.Categorical(data['group'])

# 对数据进行组内差分
data['y_diff'] = data.groupby('group')['y'].transform(lambda x: x.diff())

# 拟合固定效应模型
model = sm.OLS(data['y_diff'], sm.add_constant(data['time']))
result = model.fit()

# 打印模型结果
print(result.summary())


ModuleNotFoundError: No module named 'statsmodels'