# Linear Regression Model Validation

Comprehensive validation tests for the LinearRegressionModel.

**Tests:**
1. Basic functionality (fit, predict, evaluate)
2. Regularization behavior (Ridge, Lasso, ElasticNet)
3. Feature selection with Lasso
4. Polynomial features
5. Serialization (save/load)
6. Edge cases and error handling
7. Coefficient interpretation
8. Goal predictor dual model

Run this BEFORE using the model in production to catch bugs.

In [None]:
# Setup
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import tempfile
from pathlib import Path

from utils.linear_model import (
    LinearRegressionModel,
    LinearGoalPredictor,
    grid_search_linear,
    random_search_linear,
    compare_regularization,
)

# Test tracking
test_results = []

def record_test(name, passed, message=""):
    status = "✅ PASS" if passed else "❌ FAIL"
    test_results.append({'test': name, 'passed': passed, 'message': message})
    print(f"{status}: {name}")
    if message:
        print(f"       {message}")

print("Validation setup complete!")

## Test 1: Basic Functionality

In [None]:
# Create test data
np.random.seed(42)
n = 200

X_test = pd.DataFrame({
    'feature_1': np.random.normal(0, 1, n),
    'feature_2': np.random.normal(0, 1, n),
    'feature_3': np.random.normal(0, 1, n),
})

# Known relationship: y = 2 + 0.5*x1 + 1.0*x2 + noise
y_test = 2 + 0.5 * X_test['feature_1'] + 1.0 * X_test['feature_2'] + np.random.normal(0, 0.1, n)

# Test 1a: Model creation
try:
    model = LinearRegressionModel(alpha=0.01, l1_ratio=0.5)
    record_test("1a. Model creation", True)
except Exception as e:
    record_test("1a. Model creation", False, str(e))

# Test 1b: Fit
try:
    model.fit(X_test, y_test)
    record_test("1b. Model fit", model.is_fitted, f"is_fitted={model.is_fitted}")
except Exception as e:
    record_test("1b. Model fit", False, str(e))

# Test 1c: Predict
try:
    predictions = model.predict(X_test)
    valid = len(predictions) == len(y_test) and not np.isnan(predictions).any()
    record_test("1c. Model predict", valid, f"n_predictions={len(predictions)}")
except Exception as e:
    record_test("1c. Model predict", False, str(e))

# Test 1d: Evaluate
try:
    metrics = model.evaluate(X_test, y_test)
    valid = 'rmse' in metrics and 'mae' in metrics and 'r2' in metrics
    record_test("1d. Model evaluate", valid, f"RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}")
except Exception as e:
    record_test("1d. Model evaluate", False, str(e))

## Test 2: Coefficient Recovery

In [None]:
# Test that model recovers approximately correct coefficients
# True: feature_1 = 0.5, feature_2 = 1.0, feature_3 = 0.0

try:
    # Use OLS (no regularization) for best coefficient recovery
    ols_model = LinearRegressionModel(alpha=0, scaling=None)
    ols_model.fit(X_test, y_test)
    
    coefs = ols_model.get_coefficients()
    coef_dict = dict(zip(coefs['feature'], coefs['coefficient']))
    
    # Check coefficients are close to true values
    f1_close = abs(coef_dict['feature_1'] - 0.5) < 0.1
    f2_close = abs(coef_dict['feature_2'] - 1.0) < 0.1
    f3_close = abs(coef_dict['feature_3'] - 0.0) < 0.1
    
    all_close = f1_close and f2_close and f3_close
    record_test(
        "2. Coefficient recovery", 
        all_close,
        f"f1={coef_dict['feature_1']:.3f} (true=0.5), f2={coef_dict['feature_2']:.3f} (true=1.0), f3={coef_dict['feature_3']:.3f} (true=0.0)"
    )
except Exception as e:
    record_test("2. Coefficient recovery", False, str(e))

## Test 3: Regularization Types

In [None]:
# Test 3a: Ridge (L2) - l1_ratio=0
try:
    ridge = LinearRegressionModel(alpha=1.0, l1_ratio=0.0)
    ridge.fit(X_test, y_test)
    model_type = ridge._get_model_type()
    record_test("3a. Ridge (L2) creation", model_type == 'Ridge', f"type={model_type}")
except Exception as e:
    record_test("3a. Ridge (L2) creation", False, str(e))

# Test 3b: Lasso (L1) - l1_ratio=1
try:
    lasso = LinearRegressionModel(alpha=1.0, l1_ratio=1.0)
    lasso.fit(X_test, y_test)
    model_type = lasso._get_model_type()
    record_test("3b. Lasso (L1) creation", model_type == 'Lasso', f"type={model_type}")
except Exception as e:
    record_test("3b. Lasso (L1) creation", False, str(e))

# Test 3c: ElasticNet - l1_ratio=0.5
try:
    elastic = LinearRegressionModel(alpha=1.0, l1_ratio=0.5)
    elastic.fit(X_test, y_test)
    model_type = elastic._get_model_type()
    record_test("3c. ElasticNet creation", model_type == 'ElasticNet', f"type={model_type}")
except Exception as e:
    record_test("3c. ElasticNet creation", False, str(e))

# Test 3d: OLS (no regularization) - alpha=0
try:
    ols = LinearRegressionModel(alpha=0)
    ols.fit(X_test, y_test)
    model_type = ols._get_model_type()
    record_test("3d. OLS (no regularization)", model_type == 'OLS', f"type={model_type}")
except Exception as e:
    record_test("3d. OLS (no regularization)", False, str(e))

## Test 4: Feature Selection with Lasso

In [None]:
# Create data with some irrelevant features
np.random.seed(42)
n = 300

X_sparse = pd.DataFrame({
    'relevant_1': np.random.normal(0, 1, n),
    'relevant_2': np.random.normal(0, 1, n),
    'noise_1': np.random.normal(0, 1, n),
    'noise_2': np.random.normal(0, 1, n),
    'noise_3': np.random.normal(0, 1, n),
})

# Only first two features matter
y_sparse = 3 + 1.5 * X_sparse['relevant_1'] + 0.8 * X_sparse['relevant_2'] + np.random.normal(0, 0.2, n)

try:
    # High alpha Lasso should zero out noise features
    lasso_sparse = LinearRegressionModel(alpha=0.5, l1_ratio=1.0, scaling='standard')
    lasso_sparse.fit(X_sparse, y_sparse)
    
    nonzero = lasso_sparse.get_nonzero_features()
    
    # Should keep relevant features and zero out noise
    keeps_relevant = 'relevant_1' in nonzero or 'relevant_2' in nonzero
    zeros_noise = len(nonzero) <= 3  # At most 3 features selected
    
    record_test(
        "4. Lasso feature selection", 
        keeps_relevant and zeros_noise,
        f"Selected features: {nonzero}"
    )
except Exception as e:
    record_test("4. Lasso feature selection", False, str(e))

## Test 5: Polynomial Features

In [None]:
# Create data with quadratic relationship
np.random.seed(42)
n = 200

X_poly = pd.DataFrame({'x': np.random.uniform(-2, 2, n)})
y_poly = 1 + 0.5 * X_poly['x'] + 0.3 * X_poly['x']**2 + np.random.normal(0, 0.1, n)

# Test 5a: Polynomial degree 2
try:
    poly_model = LinearRegressionModel(alpha=0.01, poly_degree=2)
    poly_model.fit(X_poly, y_poly)
    
    # Should have expanded features
    valid = poly_model.n_features_poly_ > poly_model.n_features_
    record_test(
        "5a. Polynomial feature expansion", 
        valid,
        f"Original: {poly_model.n_features_}, After poly: {poly_model.n_features_poly_}"
    )
except Exception as e:
    record_test("5a. Polynomial feature expansion", False, str(e))

# Test 5b: Polynomial improves fit on quadratic data
try:
    linear_model = LinearRegressionModel(alpha=0.01, poly_degree=1)
    linear_model.fit(X_poly, y_poly)
    linear_r2 = linear_model.evaluate(X_poly, y_poly)['r2']
    
    poly_model = LinearRegressionModel(alpha=0.01, poly_degree=2)
    poly_model.fit(X_poly, y_poly)
    poly_r2 = poly_model.evaluate(X_poly, y_poly)['r2']
    
    improved = poly_r2 > linear_r2
    record_test(
        "5b. Polynomial improves quadratic fit", 
        improved,
        f"Linear R²={linear_r2:.4f}, Poly R²={poly_r2:.4f}"
    )
except Exception as e:
    record_test("5b. Polynomial improves quadratic fit", False, str(e))

## Test 6: Scaling

In [None]:
# Test 6a: Standard scaling
try:
    std_model = LinearRegressionModel(alpha=0.1, scaling='standard')
    std_model.fit(X_test, y_test)
    valid = std_model.scaler is not None
    record_test("6a. Standard scaling", valid)
except Exception as e:
    record_test("6a. Standard scaling", False, str(e))

# Test 6b: Robust scaling
try:
    robust_model = LinearRegressionModel(alpha=0.1, scaling='robust')
    robust_model.fit(X_test, y_test)
    valid = robust_model.scaler is not None
    record_test("6b. Robust scaling", valid)
except Exception as e:
    record_test("6b. Robust scaling", False, str(e))

# Test 6c: No scaling
try:
    no_scale_model = LinearRegressionModel(alpha=0.1, scaling=None)
    no_scale_model.fit(X_test, y_test)
    valid = no_scale_model.scaler is None
    record_test("6c. No scaling", valid)
except Exception as e:
    record_test("6c. No scaling", False, str(e))

## Test 7: Serialization (Save/Load)

In [None]:
try:
    # Train model
    save_model = LinearRegressionModel(alpha=0.1, l1_ratio=0.5, poly_degree=2)
    save_model.fit(X_test, y_test)
    original_pred = save_model.predict(X_test[:5])
    
    # Save
    with tempfile.TemporaryDirectory() as tmpdir:
        save_path = Path(tmpdir) / 'test_model.pkl'
        save_model.save(save_path)
        
        # Load
        loaded_model = LinearRegressionModel.load(save_path)
        loaded_pred = loaded_model.predict(X_test[:5])
    
    # Check predictions match
    match = np.allclose(original_pred, loaded_pred, rtol=1e-5)
    record_test(
        "7. Save/Load serialization", 
        match,
        f"Original: {original_pred[:3]}, Loaded: {loaded_pred[:3]}"
    )
except Exception as e:
    record_test("7. Save/Load serialization", False, str(e))

## Test 8: Cross-Validation

In [None]:
try:
    cv_model = LinearRegressionModel(alpha=0.1, l1_ratio=0.5)
    cv_result = cv_model.cross_validate(X_test, y_test, cv=5)
    
    valid = (
        'mean' in cv_result and 
        'std' in cv_result and 
        'scores' in cv_result and
        len(cv_result['scores']) == 5
    )
    
    record_test(
        "8. Cross-validation", 
        valid,
        f"Mean RMSE: {cv_result['mean']:.4f} ± {cv_result['std']:.4f}"
    )
except Exception as e:
    record_test("8. Cross-validation", False, str(e))

## Test 9: LinearGoalPredictor

In [None]:
# Create game data
np.random.seed(42)
n = 200

games_df = pd.DataFrame({
    'elo_diff': np.random.normal(0, 100, n),
    'home_form': np.random.uniform(0, 1, n),
    'away_form': np.random.uniform(0, 1, n),
    'home_goals': np.random.poisson(3.0, n),
    'away_goals': np.random.poisson(2.5, n),
})

# Test 9a: Predictor creation and fit
try:
    predictor = LinearGoalPredictor(alpha=0.1, l1_ratio=0.5)
    predictor.fit(games_df)
    
    valid = predictor.is_fitted and predictor.home_model.is_fitted and predictor.away_model.is_fitted
    record_test("9a. GoalPredictor fit", valid)
except Exception as e:
    record_test("9a. GoalPredictor fit", False, str(e))

# Test 9b: Single game prediction
try:
    sample_game = games_df.iloc[0]
    home_pred, away_pred = predictor.predict_goals(sample_game)
    
    valid = isinstance(home_pred, float) and isinstance(away_pred, float)
    record_test(
        "9b. Single game prediction", 
        valid,
        f"Predicted: {home_pred:.2f} - {away_pred:.2f}"
    )
except Exception as e:
    record_test("9b. Single game prediction", False, str(e))

# Test 9c: Batch prediction
try:
    batch_pred = predictor.predict_batch(games_df)
    
    valid = (
        'home_pred' in batch_pred.columns and 
        'away_pred' in batch_pred.columns and
        len(batch_pred) == len(games_df)
    )
    record_test("9c. Batch prediction", valid, f"n_predictions={len(batch_pred)}")
except Exception as e:
    record_test("9c. Batch prediction", False, str(e))

# Test 9d: Evaluate
try:
    metrics = predictor.evaluate(games_df)
    
    valid = (
        'home' in metrics and 
        'away' in metrics and 
        'combined' in metrics and
        'win_accuracy' in metrics
    )
    record_test(
        "9d. GoalPredictor evaluate", 
        valid,
        f"Combined RMSE: {metrics['combined']['rmse']:.4f}, Win Acc: {metrics['win_accuracy']:.2%}"
    )
except Exception as e:
    record_test("9d. GoalPredictor evaluate", False, str(e))

## Test 10: GoalPredictor Save/Load

In [None]:
try:
    # Save predictor
    with tempfile.TemporaryDirectory() as tmpdir:
        save_path = Path(tmpdir) / 'predictor'
        predictor.save(save_path)
        
        # Load predictor
        loaded_predictor = LinearGoalPredictor.load(save_path)
    
    # Compare predictions
    orig_home, orig_away = predictor.predict_goals(games_df.iloc[0])
    load_home, load_away = loaded_predictor.predict_goals(games_df.iloc[0])
    
    match = abs(orig_home - load_home) < 0.001 and abs(orig_away - load_away) < 0.001
    record_test(
        "10. GoalPredictor save/load", 
        match,
        f"Original: {orig_home:.3f}-{orig_away:.3f}, Loaded: {load_home:.3f}-{load_away:.3f}"
    )
except Exception as e:
    record_test("10. GoalPredictor save/load", False, str(e))

## Test 11: Grid Search

In [None]:
try:
    result = grid_search_linear(
        X_test, y_test,
        param_grid={
            'alpha': [0.01, 0.1],
            'l1_ratio': [0.0, 1.0],
        },
        cv=3,
        verbose=False
    )
    
    valid = (
        'best_params' in result and
        'best_score' in result and
        'all_results' in result
    )
    
    record_test(
        "11. Grid search", 
        valid,
        f"Best: {result['best_params']}, RMSE: {result['best_score']:.4f}"
    )
except Exception as e:
    record_test("11. Grid search", False, str(e))

## Test 12: Random Search

In [None]:
try:
    result = random_search_linear(
        X_test, y_test,
        param_distributions={
            'alpha': [0.01, 0.1, 1.0],
            'l1_ratio': [0.0, 0.5, 1.0],
        },
        n_iter=5,
        cv=3,
        verbose=False
    )
    
    valid = (
        'best_params' in result and
        'best_score' in result and
        len(result['all_results']) == 5
    )
    
    record_test(
        "12. Random search", 
        valid,
        f"Best: {result['best_params']}, RMSE: {result['best_score']:.4f}"
    )
except Exception as e:
    record_test("12. Random search", False, str(e))

## Test 13: Compare Regularization

In [None]:
try:
    comparison = compare_regularization(
        X_test, y_test,
        alphas=[0.1, 1.0],
        cv=3
    )
    
    valid = (
        'Ridge' in comparison['model'].values and
        'Lasso' in comparison['model'].values and
        'ElasticNet' in comparison['model'].values
    )
    
    record_test(
        "13. Compare regularization", 
        valid,
        f"n_comparisons={len(comparison)}"
    )
except Exception as e:
    record_test("13. Compare regularization", False, str(e))

## Test 14: Edge Cases

In [None]:
# Test 14a: Prediction before fit should raise error
try:
    unfitted_model = LinearRegressionModel()
    unfitted_model.predict(X_test)
    record_test("14a. Unfitted predict raises error", False, "Should have raised RuntimeError")
except RuntimeError as e:
    record_test("14a. Unfitted predict raises error", True, str(e))
except Exception as e:
    record_test("14a. Unfitted predict raises error", False, f"Wrong error: {e}")

# Test 14b: Save before fit should raise error
try:
    unfitted_model = LinearRegressionModel()
    unfitted_model.save('/tmp/unfitted.pkl')
    record_test("14b. Unfitted save raises error", False, "Should have raised RuntimeError")
except RuntimeError as e:
    record_test("14b. Unfitted save raises error", True, str(e))
except Exception as e:
    record_test("14b. Unfitted save raises error", False, f"Wrong error: {e}")

# Test 14c: NaN handling in features
try:
    X_nan = X_test.copy()
    X_nan.iloc[0, 0] = np.nan
    
    nan_model = LinearRegressionModel(alpha=0.1)
    nan_model.fit(X_nan, y_test)  # Should handle NaN
    
    record_test("14c. NaN handling in features", True, "NaN replaced with 0")
except Exception as e:
    record_test("14c. NaN handling in features", False, str(e))

## Test 15: Feature Importance

In [None]:
try:
    importance = model.get_feature_importance(top_n=3)
    
    valid = (
        isinstance(importance, pd.Series) and
        len(importance) <= 3 and
        importance.name == 'importance'
    )
    
    record_test(
        "15. Feature importance", 
        valid,
        f"Top features: {list(importance.index)}"
    )
except Exception as e:
    record_test("15. Feature importance", False, str(e))

---

## Validation Summary

In [None]:
# Summary
results_df = pd.DataFrame(test_results)
passed = results_df['passed'].sum()
total = len(results_df)

print("=" * 60)
print("VALIDATION SUMMARY")
print("=" * 60)
print(f"\nPassed: {passed}/{total} ({100*passed/total:.1f}%)")
print()

if passed == total:
    print("✅ ALL TESTS PASSED - Linear Regression model is validated!")
else:
    print("❌ SOME TESTS FAILED:")
    failed = results_df[~results_df['passed']]
    for _, row in failed.iterrows():
        print(f"   - {row['test']}: {row['message']}")

print("\n" + "=" * 60)
print("Detailed Results:")
print("=" * 60)
results_df