In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
def generate_data(n_samples=1000):
    X = np.linspace(0, 10, n_samples).reshape(-1, 1)
    y = 2 * X + 1 + np.random.randn(n_samples, 1) * 2
    return X, y

def calculate_bias_variance(model, X, y, n_iterations=100):
    predictions = np.zeros((len(X), n_iterations))
    
    for i in range(n_iterations):
        X_train, _, y_train, _ = train_test_split(X, y, test_size=0.3)
        model.fit(X_train, y_train)
        predictions[:, i] = model.predict(X).ravel()
    
    expected_predictions = np.mean(predictions, axis=1)
    bias = np.mean((y.ravel() - expected_predictions) ** 2)
    variance = np.mean(np.var(predictions, axis=1))
    
    return bias, variance

In [3]:
# Generate data
X, y = generate_data()

In [5]:
# Create and evaluate model
model = LinearRegression()
bias, variance = calculate_bias_variance(model, X, y)

print(f"Bias: {bias}")
print(f"Variance: {variance}")

Bias: 4.259952912194949
Variance: 0.003629708055143869
