In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
p = 100  # dimension
n_samples_per_task = 200  # number of samples per task
noise_std = 0.1  # standard deviation of Gaussian noise

# Generate two tasks
# Task 1
X1 = np.random.randn(n_samples_per_task, p)  # features from isotropic Gaussian
beta1 = np.random.randn(p)  # true parameter vector for task 1
y1 = X1 @ beta1 + np.random.normal(0, noise_std, n_samples_per_task)  # labels with noise

# Task 2
X2 = np.random.randn(n_samples_per_task, p)  # features from isotropic Gaussian
beta2 = np.random.randn(p)  # true parameter vector for task 2
y2 = X2 @ beta2 + np.random.normal(0, noise_std, n_samples_per_task)  # labels with noise

# Display basic information
print(f"Task 1 - Feature matrix shape: {X1.shape}, Label vector shape: {y1.shape}")
print(f"Task 2 - Feature matrix shape: {X2.shape}, Label vector shape: {y2.shape}")
print(f"Beta 1 norm: {np.linalg.norm(beta1):.4f}")
print(f"Beta 2 norm: {np.linalg.norm(beta2):.4f}")
print(f"Correlation between beta1 and beta2: {np.corrcoef(beta1, beta2)[0, 1]:.4f}")

In [None]:

# Experiment: OLS vs HPS with varying δ
np.random.seed(42)

# Fixed sample sizes
n1 = 200  # samples for task 1
n2 = 100  # samples for task 2
p = 100   # feature dimension
noise_std = 0.1

# Generate base parameter vector for task 1
beta1_base = np.random.randn(p)

# Test data (larger sample for reliable evaluation)
n_test = 1000
X1_test = np.random.randn(n_test, p)
X2_test = np.random.randn(n_test, p)

# Vary δ from 0.01 to 1.00
deltas = np.linspace(0.01, 1.00, 20)
ols_task2_losses = []
hps_losses = []

for delta in deltas:
    # Create beta2 such that ||beta1 - beta2|| = delta
    # beta2 = beta1 + delta * normalized_random_vector
    random_direction = np.random.randn(p)
    random_direction /= np.linalg.norm(random_direction)
    beta2 = beta1_base + delta * random_direction
    
    # Generate training data for both tasks
    X1_train = np.random.randn(n1, p)
    y1_train = X1_train @ beta1_base + np.random.normal(0, noise_std, n1)
    
    X2_train = np.random.randn(n2, p)
    y2_train = X2_train @ beta2 + np.random.normal(0, noise_std, n2)
    
    # Test labels for task 2
    y2_test = X2_test @ beta2 + np.random.normal(0, noise_std, n_test)
    
    # 1. OLS Estimator: Train only on task 2
    # Solution: beta_hat = (X2^T X2)^{-1} X2^T y2
    X2T_X2 = X2_train.T @ X2_train
    X2T_y2 = X2_train.T @ y2_train
    beta2_ols = np.linalg.solve(X2T_X2 + 1e-8 * np.eye(p), X2T_y2)
    
    # OLS test loss on task 2
    y2_pred_ols = X2_test @ beta2_ols
    ols_loss = np.mean((y2_test - y2_pred_ols) ** 2)
    ols_task2_losses.append(ols_loss)
    
    # 2. Hard Parameter Sharing (HPS) Estimator
    # Pool data from both tasks and learn a shared representation
    # Concatenate training data from both tasks
    X_combined = np.vstack([X1_train, X2_train])
    y_combined = np.hstack([y1_train, y2_train])
    
    # Learn shared parameter using all combined data
    X_combined_T_X_combined = X_combined.T @ X_combined
    X_combined_T_y_combined = X_combined.T @ y_combined
    beta_shared = np.linalg.solve(X_combined_T_X_combined + 1e-8 * np.eye(p), X_combined_T_y_combined)
    
    # HPS test loss on task 2 (using shared representation)
    y2_pred_hps = X2_test @ beta_shared
    hps_loss = np.mean((y2_test - y2_pred_hps) ** 2)
    hps_losses.append(hps_loss)

# Plot comparison
plt.figure(figsize=(10, 6))
plt.plot(deltas, ols_task2_losses, 'o-', label='OLS (Task 2 only)', linewidth=2, markersize=6)
plt.plot(deltas, hps_losses, 's-', label='HPS (Hard Parameter Sharing)', linewidth=2, markersize=6)
plt.xlabel('δ = ||β₁ - β₂||', fontsize=12)
plt.ylabel('Test Loss on Task 2', fontsize=12)
plt.title('Transfer Learning: OLS vs HPS Estimators', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"δ range: [{deltas[0]:.2f}, {deltas[-1]:.2f}]")
print(f"OLS loss range: [{min(ols_task2_losses):.4f}, {max(ols_task2_losses):.4f}]")
print(f"HPS loss range: [{min(hps_losses):.4f}, {max(hps_losses):.4f}]")
