# Lasso Regression  ( L1 Regularization )

---


IMPORTING DATA SCIENCE LIBRARIES

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


IMPORT MACHINE LEARNING LIBRARIES AND CLASSES

In [None]:
from sklearn.model_selection import train_test_split               #for splitting the data into test and training data
from sklearn.compose import ColumnTransformer                       #for transforming the columns
from sklearn.impute import SimpleImputer                             #for imputing the missing values
from sklearn.preprocessing import OneHotEncoder                      #one hot encoding
from sklearn.preprocessing import MinMaxScaler                        #standard scaling

from sklearn.datasets import load_diabetes

from sklearn.datasets import make_regression

import plotly.express as px
import plotly.graph_objects as go


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score                 # for accuracy score
from sklearn.model_selection import cross_val_score        # for cross validation score

from sklearn.linear_model import LinearRegression           # Import the LinearRegression class
from sklearn.metrics import mean_squared_error, r2_score    # to find out the error functions
from sklearn.preprocessing import PolynomialFeatures , StandardScaler   # for the polunomial features

from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge   # ridge Regression
from sklearn.linear_model import Lasso   # lasso Regression
from sklearn.linear_model import ElasticNet   # elasticNet Regression
from mlxtend.plotting import plot_linear_regression    # MLX bs
from sklearn.linear_model import SGDRegressor

In [None]:
!pip install mlx

# 1. How are coefficients affected ?


In [None]:
data = load_diabetes()
df = pd.DataFrame(data.data , columns=data.feature_names)
df['target'] = data.target
df.head()

In [None]:
X_train , X_test , y_train , y_test = train_test_split(df.drop('target' , axis=1) , df['target'] , test_size=0.2 , random_state=2)

 Performing Lasso Regression

In [None]:
coef = []
r2_scores = []

for i in [0, 0.1, 1, 10]:
    reg = Lasso(alpha=i)
    reg.fit(X_train, y_train)

    coef.append(reg.coef_.tolist())
    y_pred = reg.predict(X_test)
    # Call the r2_score function from sklearn.metrics
    r2_scores.append(r2_score(y_test, y_pred))

PLotting a graph for the Lasso Regression

In [None]:
plt.figure(figsize=(14, 9))
plt.subplot(221)
plt.bar(data.feature_names, coef[0])
plt.title('Alpha = 0 , r2_score{}'.format(round(r2_scores[0],2))  )


plt.figure(figsize=(14, 9))
plt.subplot(222)
plt.bar(data.feature_names, coef[1])
plt.title('Alpha = 0.1 , r2_score{}'.format(round(r2_scores[1],2)) )


plt.figure(figsize=(14, 9))
plt.subplot(224)
plt.bar(data.feature_names, coef[2])
plt.title('Alpha = 1 , r2_score{}'.format(round(r2_scores[2],2)) )


plt.figure(figsize=(14, 9))
plt.subplot(224)
plt.bar(data.feature_names, coef[3])
plt.title('Alpha = 10 , r2_score{}'.format(round(r2_scores[3],2)))

When  we increase the value of Alpha too much that is we made it Alpha  = 10 then - it made all the weights of the features as 0  . This leads to Under-Fitting .

# 2. Higher Coefficients are Affected More

In [None]:
alphas = [0 , 0.001 , 0.01 , 0.1 , 1 , 10 , 100 , 1000]  # the different alpha values
coefs = []         # We store our Coefficients here
r2_scores = []      # We store our R2 Scores here

for i in alphas:
    reg = Ridge(alpha=i)
    reg.fit(X_train, y_train)
    coefs.append(reg.coef_.tolist())

In [None]:
input_array  = np.array(coefs)

coef_df = pd.DataFrame(input_array , columns=data.feature_names , index=alphas)
coef_df['alpha'] = alphas
coef_df.set_index('alpha')

# 3. Impact of Lamba on Bias and Variance

In [None]:
m = 100
X = 5 * np.random.rand(m, 1)
y = 0.7  + X**2 - X*2 + 3 + np.random.randn(m, 1)

plt.scatter(X , y )
plt.show()

In [None]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=2)

In [None]:
from mlxtend.evaluate import bias_variance_decomp

alphas  = np.linspace( 0 , 30 , 100)

loss = []
bias = []
variance = []

for i in alphas:
    reg = Lasso(alpha=i)

    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        reg, X_train, y_train, X_test, y_test, loss='mse', random_seed=123 )

    loss.append(avg_expected_loss)
    bias.append(avg_bias)
    variance.append(avg_var)

In [None]:
plt.plot(alphas , loss , label='Loss')
plt.plot(alphas , bias , label='Bias')
plt.plot(alphas , variance , label='Variance')
plt.legend()
plt.show()

# 4. Effect of Regularization on LOSS FUNCTION

In [None]:
from sklearn.datasets import make_regression

x , y = make_regression(n_samples=100 , n_features=1 , n_informative=1 , n_targets=1 , noise=20 , random_state=13 )

plt.scatter(x,  y)

from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(x, y)
print(lin_reg.coef_)
print(lin_reg.intercept_)

In [None]:
def cal_loss ( m , alpha ):
  return np.sum((y - m*x.ravel() + 2.29 )**2) + alpha*abs(m)

In [None]:
def predict(m):
  return m*x - 2.29

In [None]:
m  = np.linspace ( -45 , 100 , 100 )
plt.figure(figsize=(12,12))

for j in [0 , 10 , 100 ,  500, 1000, 2500 ]:
    loss = []
    for i  in range (m.shape[0]):

      loss_i = cal_loss(m[i] , j)

      loss.append(loss_i)
    plt.plot(m , loss , label=f'alpha={j}')
plt.legend()
plt.xlabel('Alpha')
plt.ylabel('loss')
plt.show()