# Regularization (Ridge and Lasso)

In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split 

from sklearn.linear_model import LinearRegression,LogisticRegression, LogisticRegressionCV
from sklearn.linear_model import Ridge, RidgeCV, Lasso, LassoCV, ElasticNet, ElasticNetCV
from sklearn.model_selection import GridSearchCV

import statsmodels.api as sm

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer
from sklearn.metrics import roc_curve

In [None]:
import sklearn.metrics as metrics
def regression_results(y_true, y_pred):

    # Regression metrics
    explained_variance=metrics.explained_variance_score(y_true, y_pred)
    mean_absolute_error=metrics.mean_absolute_error(y_true, y_pred) 
    mse=metrics.mean_squared_error(y_true, y_pred) 
#     mean_squared_log_error=metrics.mean_squared_log_error(y_true, y_pred)
    median_absolute_error=metrics.median_absolute_error(y_true, y_pred)
    r2=metrics.r2_score(y_true, y_pred)

    print('explained_variance: ', round(explained_variance,4))    
#     print('mean_squared_log_error: ', round(mean_squared_log_error,4))
    print('r2: ', round(r2,4))
    print('MAE: ', round(mean_absolute_error,4))
    print('MSE: ', round(mse,4))
    print('RMSE: ', round(np.sqrt(mse),4))

## Linear Regression

In [None]:
# Data Structure
df = pd.read_csv('housing_hw.csv')
X = df.drop(['MEDV'],axis=1)
y = df['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
model0 = LinearRegression().fit(X_train,y_train)
modelR = Ridge(alpha=1).fit(X_train,y_train) ## alpha = lambda
modelL = Lasso(alpha=1).fit(X_train,y_train)
modelE = ElasticNet(alpha=1, l1_ratio=0.5).fit(X_train,y_train) ## l1_ratio = 1 --> Lasso, l1_ratio=0 --> Ridge

In [None]:
print(model0.coef_)
print(modelR.coef_)
print(modelL.coef_)
print(modelE.coef_)

## Multicollinearity

In [None]:
# Data Structure
df = pd.read_csv('multi.csv')
X = df.drop(['Target'],axis=1)
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
modelM0 = LinearRegression().fit(X_train,y_train)

In [None]:
modelM0.coef_

In [None]:
regression_results(y_test,modelM0.predict(X_test))

In [None]:
modelMR = Ridge(alpha=1).fit(X_train,y_train)

In [None]:
regression_results(y_test,modelMR.predict(X_test))

In [None]:
parameters=[{'alpha':np.linspace(0.001,10,100)}]
modelMRCV = GridSearchCV(modelMR, parameters, scoring='neg_mean_squared_error',cv=10).fit(X_train,y_train)
modelMRCV.best_params_

In [None]:
modelMRCV = RidgeCV(alphas=np.linspace(0.001,10,100)).fit(X_train,y_train)

In [None]:
modelMRCV.coef_

In [None]:
regression_results(y_test,modelMRCV.predict(X_test))

## Logistic Regression + Ridge/Lasso

In [None]:
bank = pd.read_csv('UniversalBank.csv')
bank['Education'] =  bank['Education'].astype('category')

In [None]:
X = bank.drop(['ID','ZIPCode','PersonalLoan'],axis=1)

In [None]:
y=bank['PersonalLoan']
y = y.astype('category')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1,stratify=y)

In [None]:
logistic = LogisticRegression().fit(X_train,y_train)

In [None]:
y_pred = logistic.predict(X_test)

In [None]:
confmat = pd.DataFrame(confusion_matrix(y_test, y_pred),
                      index=['True[0]','True[1]'],
                      columns=['Predict[0]', 'Predict[1]'])
confmat

In [None]:
print('Classification Report')
print(classification_report(y_test, y_pred))

In [None]:
## c = 1/lambda i.e., big C = small lambda
logisticCV = LogisticRegressionCV(Cs=np.linspace(0.1,10000,1000), cv=5,scoring='accuracy',penalty='l2').fit(X,y) ## default = l2 - ridge

In [None]:
logisticCV.C_

In [None]:
y_pred = logisticCV.predict(X_test)

In [None]:
confmat = pd.DataFrame(confusion_matrix(y_test, y_pred),
                      index=['True[0]','True[1]'],
                      columns=['Predict[0]', 'Predict[1]'])
confmat

In [None]:
print('Classification Report')
print(classification_report(y_test, y_pred))