In [None]:
import numpy as np
import pandas as pd

In [None]:
import kagglehub
  # Download latest version
path = kagglehub.dataset_download("camnugent/california-housing-prices")
print("Path to dataset files:",path)

Using Colab cache for faster access to the 'california-housing-prices' dataset.
Path to dataset files: /kaggle/input/california-housing-prices


In [None]:
import os
housing_data_path = os.path.join(path,"housing.csv")
df =pd.read_csv(housing_data_path)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  float64
 3   total_rooms         20640 non-null  float64
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  float64
 6   households          20640 non-null  float64
 7   median_income       20640 non-null  float64
 8   median_house_value  20640 non-null  float64
 9   ocean_proximity     20640 non-null  object 
dtypes: float64(9), object(1)
memory usage: 1.6+ MB


In [None]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [None]:
missing_info = df. isnull().sum()
print(missing_info)
for columns in df.columns:
  if missing_info[columns] > 0:
    df.drop(columns =[columns], inplace = True)

longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        207
population              0
households              0
median_income           0
median_house_value      0
ocean_proximity         0
dtype: int64


In [None]:
missing_info = df. isnull().sum()
print(missing_info)

longitude             0
latitude              0
housing_median_age    0
total_rooms           0
population            0
households            0
median_income         0
median_house_value    0
ocean_proximity       0
dtype: int64


In [None]:
X = df.drop(columns = ["median_house_value"]).values
y = df["median_house_value"].values

In [None]:
X = pd.get_dummies(df, columns=["ocean_proximity"], drop_first=True)

In [None]:
X_train, X_test, y_train, y_test =  train_test_split(X,y,test_size=0.3,random_state=42)

In [None]:
scaler_X = StandardScaler()
x_train_scaled = scaler_X.fit_transform(X_train)
x_test_scaled = scaler_X.transform(X_test)

In [None]:
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.fit_transform(y_test.reshape(-1,1))

In [None]:
# step:1
linear_model = LinearRegression()
linear_model.fit(x_train_scaled, y_train_scaled)

In [None]:
# Make prediction
y_train_pred = linear_model.predict(x_train_scaled)
y_test_pred = linear_model.predict(x_test_scaled)

In [None]:
train_mse = mean_squared_error(y_train_scaled, y_train_pred)
test_mse = mean_squared_error(y_test_scaled, y_test_pred)

In [None]:
print(f"Training MSE: {train_mse:.3f}")
print(f"\nTest MSE: {test_mse:.3f}")

Training MSE: 0.000

Test MSE: 0.000


In [None]:
#Dispaly coefficients
coefficients_df = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": linear_model.coef_
})

In [None]:
print("Top 10 model coefficients:")
print(coefficients_df.sort_values("Coefficient",key=abs,ascending=False).head(10))

Top 10 model coefficients:
                       Feature   Coefficient
7           median_house_value  1.000000e+00
1                     latitude -8.187895e-16
0                    longitude -8.133428e-16
5                   households -4.601354e-16
6                median_income  2.841152e-16
8       ocean_proximity_INLAND -2.836273e-16
3                  total_rooms  2.764716e-16
10    ocean_proximity_NEAR BAY  2.099015e-16
2           housing_median_age  1.669671e-16
11  ocean_proximity_NEAR OCEAN -1.014813e-16


In [None]:
print(f"Training set size: {x_train_scaled.shape[0]}")
print(f"Test set size: {x_test_scaled.shape[0]} ")

Training set size: 14448
Test set size: 6192 


In [None]:
print("\n"+"="*70)
print("Step 2: HYPERPARAMETER TUNING WITH GRID SEARCH CV")
print("="*70)


Step 2: HYPERPARAMETER TUNING WITH GRID SEARCH CV


In [None]:
# Define the grid of alpha values (regularization strength)
# Using a wide range from very small to very large values
# Alpha controls how strongly the model penalizes large coefficients.
alpha_grid = {
    'alpha': [0.001,0.01,0.1,1,10,100,1000,10000]
}
print("\nAlpha values to test:", alpha_grid['alpha'])


Alpha values to test: [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]


In [None]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import GridSearchCV

print("\n" + "="*70)
print("RIGID REGRESSION")
print("="*70)

ridge = Ridge()

ridge_grid_search = GridSearchCV(
    estimator = ridge,
    param_grid = alpha_grid,
    cv = 5,
    scoring ='neg_mean_squared_error',
    n_jobs = -1,
    verbose=1
)

print("\nTraining Ridge with GridSearchCV....")
ridge_grid_search.fit(x_train_scaled, y_train_scaled)



RIGID REGRESSION

Training Ridge with GridSearchCV....
Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [None]:
#  get the best parameters
best_ridge_alpha = ridge_grid_search.best_params_['alpha']
print(f"\nBest alpha for ridge: {best_ridge_alpha}")



Best alpha for ridge: 0.001


In [None]:
# get cross-validatio results
ridge_cv_results = pd.DataFrame(ridge_grid_search.cv_results_)
ridge_cv_results["mean_mse"] = -ridge_cv_results['mean_test_score']
ridge_cv_results['std_mse'] = ridge_cv_results['std_test_score']

print("\nCross-Validation Results for Ridge:")
print(ridge_cv_results[['param_alpha', 'mean_mse', 'std_mse']].to_string(index=False))


Cross-Validation Results for Ridge:
 param_alpha     mean_mse      std_mse
       0.001 2.120810e-14 1.745611e-15
       0.010 2.120789e-12 1.745569e-13
       0.100 2.120583e-10 1.745146e-11
       1.000 2.118527e-08 1.740931e-09
      10.000 2.098326e-06 1.699925e-07
     100.000 1.924666e-04 1.378026e-05
    1000.000 1.100163e-02 4.783654e-04
   10000.000 1.630296e-01 5.974828e-03


In [None]:
# Train the best Ridge model
best_ridge_model = Ridge(alpha=best_ridge_alpha)
best_ridge_model.fit(x_train_scaled, y_train_scaled)

# Make predictions
ridge_train_pred = best_ridge_model.predict(x_train_scaled)
ridge_test_pred = best_ridge_model.predict(x_test_scaled)

# Calculate MSE
ridge_train_mse = mean_squared_error(y_train_scaled, ridge_train_pred)
ridge_test_mse = mean_squared_error(y_test_scaled, ridge_test_pred)

print(f"\nRidge Model Performance:")
print(f"Training MSE: {ridge_train_mse:.4f}")
print(f"Test MSE:     {ridge_test_mse:.4f}")


Ridge Model Performance:
Training MSE: 0.0000
Test MSE:     0.0001


In [None]:
print("\n" + "=" *70)
print("LASSO REGRESSION")
print("="*70)

lasso = Lasso(max_iter=10)

lasso_grid_search = GridSearchCV(
    estimator = lasso,
    param_grid = alpha_grid,
    cv=5,
    scoring = 'neg_mean_squared_error',
    n_jobs =-1,
    verbose=1
)

print("\nTrainingg lasso with gridsearchCV")
lasso_grid_search.fit(x_train_scaled, y_train_scaled)


LASSO REGRESSION

Trainingg lasso with gridsearchCV
Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [None]:
best_lasso_alpha = lasso_grid_search.best_params_['alpha']
print(f"\n Best alpha for lasso : {best_lasso_alpha}")

lasso_cv_results = pd.DataFrame(lasso_grid_search.cv_results_)
lasso_cv_results['mean_mse'] = -lasso_cv_results['mean_test_score']
lasso_cv_results['std_mse'] = lasso_cv_results['std_test_score']

print("\nCross-Validation Results for Lasso:")
print(lasso_cv_results[['param_alpha', 'mean_mse', 'std_mse']].to_string(index=False))




 Best alpha for lasso : 0.01

Cross-Validation Results for Lasso:
 param_alpha  mean_mse  std_mse
       0.001  0.000979 0.000165
       0.010  0.000100 0.000005
       0.100  0.010010 0.000469
       1.000  0.994097 0.041231
      10.000  1.000228 0.031570
     100.000  1.000228 0.031570
    1000.000  1.000228 0.031570
   10000.000  1.000228 0.031570


In [None]:
# Train the best Lasso model
best_lasso_model = Lasso(alpha=best_lasso_alpha, max_iter=10000)
best_lasso_model.fit(x_train_scaled, y_train_scaled)

# Make predictions
lasso_train_pred = best_lasso_model.predict(x_train_scaled)
lasso_test_pred = best_lasso_model.predict(x_test_scaled)

# Calculate MSE
lasso_train_mse = mean_squared_error(y_train_scaled, lasso_train_pred)
lasso_test_mse = mean_squared_error(y_test_scaled, lasso_test_pred)

print(f"\nLasso Model Performance:")
print(f"Training MSE: {lasso_train_mse:.4f}")
print(f"Test MSE:     {lasso_test_mse:.4f}")

# Count non-zero coefficients (feature selection by Lasso)
non_zero_coefs = np.sum(best_lasso_model.coef_ != 0)
print(f"Number of non-zero coefficients: {non_zero_coefs} out of {len(best_lasso_model.coef_)}")



Lasso Model Performance:
Training MSE: 0.0001
Test MSE:     0.0004
Number of non-zero coefficients: 1 out of 12


In [None]:
# SUMMARY
# ============================================

print("\n" + "="*70)
print("HYPERPARAMETER TUNING SUMMARY")
print("="*70)

summary_data = {
    'Model': ['Ridge', 'Lasso'],
    'Best Alpha': [best_ridge_alpha, best_lasso_alpha],
    'Training MSE': [f"{ridge_train_mse:.4f}", f"{lasso_train_mse:.4f}"],
    'Test MSE': [f"{ridge_test_mse:.4f}", f"{lasso_test_mse:.4f}"],
    'Non-zero Coefs': [len(best_ridge_model.coef_), non_zero_coefs]
}

summary_df = pd.DataFrame(summary_data)
print("\n", summary_df.to_string(index=False))

print("\n" + "="*70)
print("Key Insights:")
print("-"*70)
print(f"• Ridge selected alpha = {best_ridge_alpha}, keeping all features")
print(f"• Lasso selected alpha = {best_lasso_alpha}, keeping {non_zero_coefs}/{len(best_lasso_model.coef_)} features")
print(f"• Lasso performs feature selection by setting {len(best_lasso_model.coef_) - non_zero_coefs} coefficients to zero")

if ridge_test_mse < lasso_test_mse:
    print(f"• Ridge has lower test MSE ({ridge_test_mse:.4f} vs {lasso_test_mse:.4f})")
else:
    print(f"• Lasso has lower test MSE ({lasso_test_mse:.4f} vs {ridge_test_mse:.4f})")

print("="*70)


HYPERPARAMETER TUNING SUMMARY

 Model  Best Alpha Training MSE Test MSE  Non-zero Coefs
Ridge       0.001       0.0000   0.0001              12
Lasso       0.010       0.0001   0.0004               1

Key Insights:
----------------------------------------------------------------------
• Ridge selected alpha = 0.001, keeping all features
• Lasso selected alpha = 0.01, keeping 1/12 features
• Lasso performs feature selection by setting 11 coefficients to zero
• Ridge has lower test MSE (0.0001 vs 0.0004)


In [None]:
# Scale features and target
scaler_X = StandardScaler()
scaler_y = StandardScaler()

x_train_scaled = scaler_X.fit_transform(X_train)
x_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

feature_names = X.columns.tolist()

print("="*80)
print("STEP 3: REGULARIZATION EXPERIMENTS (L1 vs L2)")
print("="*80)


STEP 3: REGULARIZATION EXPERIMENTS (L1 vs L2)


In [None]:
print("\n" + "-"*80)
print("PART 1: TRAINING MODELS WITH OPTIMAL HYPERPARAMETERS")
print("-"*80)

optimal_ridge_alpha = 10
optimal_lasso_alpha = 0.01

print(f"\nUsing optimal alphas:")
print(f"  Ridge (L2): α = {optimal_ridge_alpha}")
print(f"  Lasso (L1): α = {optimal_lasso_alpha}")


--------------------------------------------------------------------------------
PART 1: TRAINING MODELS WITH OPTIMAL HYPERPARAMETERS
--------------------------------------------------------------------------------

Using optimal alphas:
  Ridge (L2): α = 10
  Lasso (L1): α = 0.01


In [None]:
# Train baseline model (no regularization)
baseline_model = LinearRegression()
baseline_model.fit(x_train_scaled, y_train_scaled)

# Train Ridge model (L2 regularization)
ridge_model = Ridge(alpha=optimal_ridge_alpha)
ridge_model.fit(x_train_scaled, y_train_scaled)

# Train Lasso model (L1 regularization)
lasso_model = Lasso(alpha=optimal_lasso_alpha, max_iter=10000)
lasso_model.fit(x_train_scaled, y_train_scaled)

print("\n All models trained successfully!")


 All models trained successfully!


In [None]:
print("\n" + "-"*80)
print("PART 2: COEFFICIENT ANALYSIS")
print("-"*80)

# Create coefficient comparison dataframe
coef_comparison = pd.DataFrame({
    'Feature': feature_names,
    'Baseline': baseline_model.coef_,
    'Ridge (L2)': ridge_model.coef_,
    'Lasso (L1)': lasso_model.coef_
})

# Count zero coefficients
baseline_zeros = np.sum(np.abs(baseline_model.coef_) < 1e-10)
ridge_zeros = np.sum(np.abs(ridge_model.coef_) < 1e-10)
lasso_zeros = np.sum(lasso_model.coef_ == 0)

print(f"\nCoefficient Statistics:")
print(f"  Total features: {len(feature_names)}")
print(f"  Baseline - Zero coefficients: {baseline_zeros}")
print(f"  Ridge (L2) - Zero coefficients: {ridge_zeros}")
print(f"  Lasso (L1) - Zero coefficients: {lasso_zeros}")
print(f"\n  → Lasso eliminated {lasso_zeros} features (sparse solution)")
print(f"  → Ridge kept all {len(feature_names)} features (dense solution)")

# Show top features by absolute coefficient value
print("\n" + "="*80)
print("TOP 10 FEATURES BY ABSOLUTE COEFFICIENT VALUE")
print("="*80)

for model_name in ['Baseline', 'Ridge (L2)', 'Lasso (L1)']:
    print(f"\n{model_name}:")
    # Create absolute value column for sorting
    coef_comparison['abs_coef'] = coef_comparison[model_name].abs()
    top_features = coef_comparison.nlargest(10, 'abs_coef')
    print(top_features[['Feature', model_name]].to_string(index=False))



--------------------------------------------------------------------------------
PART 2: COEFFICIENT ANALYSIS
--------------------------------------------------------------------------------

Coefficient Statistics:
  Total features: 12
  Baseline - Zero coefficients: 11
  Ridge (L2) - Zero coefficients: 0
  Lasso (L1) - Zero coefficients: 11

  → Lasso eliminated 11 features (sparse solution)
  → Ridge kept all 12 features (dense solution)

TOP 10 FEATURES BY ABSOLUTE COEFFICIENT VALUE

Baseline:
                   Feature      Baseline
        median_house_value  1.000000e+00
                  latitude -8.187895e-16
                 longitude -8.133428e-16
                households -4.601354e-16
             median_income  2.841152e-16
    ocean_proximity_INLAND -2.836273e-16
               total_rooms  2.764716e-16
  ocean_proximity_NEAR BAY  2.099015e-16
        housing_median_age  1.669671e-16
ocean_proximity_NEAR OCEAN -1.014813e-16

Ridge (L2):
                 Feature  Ridge 

In [None]:
print("\n" + "-"*80)
print("PART 3: MODEL PERFORMANCE EVALUATION")
print("-"*80)

# Calculate predictions and MSE for all models
models = {
    'Baseline': baseline_model,
    'Ridge (L2)': ridge_model,
    'Lasso (L1)': lasso_model
}

results = []

for name, model in models.items():
    train_pred = model.predict(x_train_scaled)
    test_pred = model.predict(x_test_scaled)

    train_mse = mean_squared_error(y_train_scaled, train_pred)
    test_mse = mean_squared_error(y_test_scaled, test_pred)

    # Calculate difference (overfitting measure)
    overfit_gap = test_mse - train_mse
    overfit_pct = (overfit_gap / train_mse) * 100

    results.append({
        'Model': name,
        'Train MSE': train_mse,
        'Test MSE': test_mse,
        'Gap': overfit_gap,
        'Gap %': overfit_pct
    })

results_df = pd.DataFrame(results)
print("\nModel Performance Comparison:")
print(results_df.to_string(index=False))


--------------------------------------------------------------------------------
PART 3: MODEL PERFORMANCE EVALUATION
--------------------------------------------------------------------------------

Model Performance Comparison:
     Model    Train MSE     Test MSE           Gap     Gap %
  Baseline 4.751447e-31 4.529941e-31 -2.215066e-32 -4.661877
Ridge (L2) 1.330974e-06 1.306426e-06 -2.454813e-08 -1.844373
Lasso (L1) 1.000000e-04 9.797337e-05 -2.026632e-06 -2.026632


In [None]:
print("\n" + "-"*80)
print("PART 4: BIAS-VARIANCE TRADEOFF ANALYSIS")
print("-"*80)

print("\nKey Observations:")

# Analyze each model
for idx, row in results_df.iterrows():
    print(f"\n{row['Model']}:")
    print(f"  Training MSE: {row['Train MSE']:.4f}")
    print(f"  Test MSE:     {row['Test MSE']:.4f}")
    print(f"  Gap:          {row['Gap']:.4f} ({row['Gap %']:.2f}%)")

    if row['Gap %'] > 5:
        print(f"  → Shows signs of overfitting (high variance)")
    elif row['Gap %'] < -5:
        print(f"  → Shows signs of underfitting (high bias)")
    else:
        print(f"  → Well-balanced bias-variance tradeoff")

print("\n" + "="*80)
print("REGULARIZATION EFFECTS:")
print("="*80)
print("""
L1 (Lasso) Regularization:
  • Produces SPARSE solutions (sets some coefficients to zero)
  • Performs automatic feature selection
  • Reduces variance by eliminating irrelevant features
  • Useful when you suspect many features are irrelevant

L2 (Ridge) Regularization:
  • Produces DENSE solutions (shrinks all coefficients)
  • Keeps all features but reduces their magnitude
  • Reduces variance by penalizing large coefficients
  • Useful when all features may contribute to prediction

Bias-Variance Tradeoff:
  • Too little regularization (small α) → High variance, overfitting
  • Optimal regularization → Balanced bias and variance
  • Too much regularization (large α) → High bias, underfitting
""")


--------------------------------------------------------------------------------
PART 4: BIAS-VARIANCE TRADEOFF ANALYSIS
--------------------------------------------------------------------------------

Key Observations:

Baseline:
  Training MSE: 0.0000
  Test MSE:     0.0000
  Gap:          -0.0000 (-4.66%)
  → Well-balanced bias-variance tradeoff

Ridge (L2):
  Training MSE: 0.0000
  Test MSE:     0.0000
  Gap:          -0.0000 (-1.84%)
  → Well-balanced bias-variance tradeoff

Lasso (L1):
  Training MSE: 0.0001
  Test MSE:     0.0001
  Gap:          -0.0000 (-2.03%)
  → Well-balanced bias-variance tradeoff

REGULARIZATION EFFECTS:

L1 (Lasso) Regularization:
  • Produces SPARSE solutions (sets some coefficients to zero)
  • Performs automatic feature selection
  • Reduces variance by eliminating irrelevant features
  • Useful when you suspect many features are irrelevant

L2 (Ridge) Regularization:
  • Produces DENSE solutions (shrinks all coefficients)
  • Keeps all features but 

In [None]:
print("="*70)
print("PART 2: CLASSIFICATION tASK")
print("="*70)

PART 2: CLASSIFICATION tASK


In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
X,y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size =0.2, random_state=42)

In [None]:
print("="*70)
print("STEP 1")
print("="*70)

STEP 1


In [None]:
#TRAIN THE MODEL ON TRAINING SET
model = LogisticRegression(
    # penalty='none',
    solver='lbfgs',
    max_iter=5000
)

In [None]:
model.fit(X_train, y_train)

In [None]:
coefficient = model.coef_
print("Model Coefficients:")
print(coefficient)

Model Coefficients:
[[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]


In [None]:
# Predict on training and test data ----------
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [None]:
#compute accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print("\nTraining Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)


Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193


In [None]:
print("="*70)
print("STEP 2: HYPERPARAMETER TUNING WITH REGU;ARIZATION")
print("="*70)

STEP 2: HYPERPARAMETER TUNING WITH REGU;ARIZATION


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [None]:
# Define Logistic Regression model (with regularization) ----------
log_reg = LogisticRegression(
    solver='liblinear',
    max_iter=5000
)

In [None]:
#define hyperparameter grid
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2']
}

In [None]:
# Perform cross-validation using GridSearchCV
grid_search = GridSearchCV(
    estimator=log_reg,
    param_grid=param_grid,
    cv=5,                # 5-fold cross-validation
    scoring='accuracy'
)


In [None]:
# Train GridSearchCV on the training set
grid_search.fit(X_train, y_train)


In [None]:
# Identify optimal hyperparameters
print("Best Hyperparameters:")
print(grid_search.best_params_)

Best Hyperparameters:
{'C': 100, 'penalty': 'l1'}


In [None]:
# Retrieve the best model
best_model = grid_search.best_estimator_

In [None]:
# Evaluate the tuned model on the test set
y_test_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy after Hyperparameter Tuning:", test_accuracy)

Test Accuracy after Hyperparameter Tuning: 0.9824561403508771


In [None]:
print("="*70)
print("STEP 3: REGULARIZATION EXPERIMENT")
print("="*70)

STEP 3: REGULARIZATION EXPERIMENT


In [None]:
# Use optimal hyperparameters found in Step 2
best_C = grid_search.best_params_['C']

In [None]:
# =====================================================
# L1 Regularization (Lasso-like)
# =====================================================

# ---------- Train Logistic Regression with L1 ----------
l1_model = LogisticRegression(
    penalty='l1',
    C=best_C,
    solver='liblinear',
    max_iter=5000
)
l1_model.fit(X_train, y_train)

In [None]:
# Evaluate L1 model
l1_train_acc = accuracy_score(y_train, l1_model.predict(X_train))
l1_test_acc = accuracy_score(y_test, l1_model.predict(X_test))

print("L1 Training Accuracy:", l1_train_acc)
print("L1 Test Accuracy:", l1_test_acc)


L1 Training Accuracy: 0.989010989010989
L1 Test Accuracy: 0.9824561403508771


In [None]:
#  Observe L1 coefficients
l1_coefficients = l1_model.coef_
l1_zero_coeffs = np.sum(l1_coefficients == 0)

print("Number of zero coefficients (L1):", l1_zero_coeffs)

Number of zero coefficients (L1): 9


In [None]:
# =====================================================
# L2 Regularization (Ridge-like)
# =====================================================

# ---------- Train Logistic Regression with L2 ----------
l2_model = LogisticRegression(
    penalty='l2',
    C=best_C,
    solver='liblinear',
    max_iter=5000
)

l2_model.fit(X_train, y_train)

In [None]:
# Evaluate L2 model
l2_train_acc = accuracy_score(y_train, l2_model.predict(X_train))
l2_test_acc = accuracy_score(y_test, l2_model.predict(X_test))

print("L2 Training Accuracy:", l2_train_acc)
print("L2 Test Accuracy:", l2_test_acc)

L2 Training Accuracy: 0.9692307692307692
L2 Test Accuracy: 0.956140350877193


In [None]:
# Observe L2 coefficients
l2_coefficients = l2_model.coef_
l2_zero_coeffs = np.sum(l2_coefficients == 0)

print("Number of zero coefficients (L2):", l2_zero_coeffs)

Number of zero coefficients (L2): 0
