In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, HuberRegressor, QuantileRegressor
from estimator import fit_regression

In [3]:
def generate_data(n=100, p=10, seed=0):
    np.random.seed(seed)
    
    # True coefficients
    beta = np.random.randn(p)
    # Design matrix X (n x p)
    X = np.random.randn(n, p)
    # Gaussian noise
    eps = np.random.randn(n)
    # Response
    y = X @ beta + eps
    
    # Combine into DataFrame: first column Y, rest X1..Xp
    columns = ['Y'] + [f'X{i+1}' for i in range(p)]
    df = pd.DataFrame(np.column_stack([y, X]), columns=columns)
    return df, beta

# Example
df, true_beta = generate_data()
print(df.head())
print("True coefficients:", true_beta)


          Y        X1        X2        X3        X4        X5        X6  \
0  2.423628  0.144044  1.454274  0.761038  0.121675  0.443863  0.333674   
1  2.792001 -2.552990  0.653619  0.864436 -0.742165  2.269755 -1.454366   
2 -4.621236  0.154947  0.378163 -0.887786 -1.980796 -0.347912  0.156349   
3  0.094564 -1.048553 -1.420018 -1.706270  1.950775 -0.509652 -0.438074   
4 -5.898248 -0.895467  0.386902 -0.510805 -1.180632 -0.028182  0.428332   

         X7        X8        X9       X10  
0  1.494079 -0.205158  0.313068 -0.854096  
1  0.045759 -0.187184  1.532779  1.469359  
2  1.230291  1.202380 -0.387327 -0.302303  
3 -1.252795  0.777490 -1.613898 -0.212740  
4  0.066517  0.302472 -0.634322 -0.362741  
True coefficients: [ 1.76405235  0.40015721  0.97873798  2.2408932   1.86755799 -0.97727788
  0.95008842 -0.15135721 -0.10321885  0.4105985 ]


In [8]:
ols = fit_regression(df, method="ols")

In [9]:
ols['coef']

X1     1.792130
X2     0.273173
X3     0.842678
X4     2.311503
X5     1.828215
X6    -0.998252
X7     0.786097
X8    -0.197357
X9    -0.111895
X10    0.451030
dtype: float64

In [11]:
ols['intercept']

-0.006365926807098243