## Ridge Regression Tutorial

In [1]:
#Import Statments
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import seaborn as sns

from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import roc_auc_score, mean_absolute_error, mean_squared_error,r2_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.datasets import make_regression

In [2]:
#Make Datasets

X,y = make_regression(n_samples=100,n_features=4,noise=1, random_state=315,effective_rank=2)

df = pd.DataFrame(X,columns=["a","b","c","d"])
df["e"] = y
print(df)


           a         b         c         d          e
0   0.145678 -0.108113  0.046811 -0.061339  11.591929
1  -0.049735  0.059540  0.017027 -0.018199  -2.799965
2   0.025508 -0.055344 -0.113126  0.071376  -6.423020
3  -0.034384 -0.017960 -0.098345 -0.028433 -11.930368
4  -0.031457 -0.039835  0.051812  0.073474   4.186652
..       ...       ...       ...       ...        ...
95 -0.030515  0.088784 -0.124643  0.029954 -12.156835
96 -0.031327  0.037406 -0.064682 -0.000899  -7.083206
97  0.146420 -0.030952 -0.093596  0.035321   2.709943
98  0.051122  0.057847  0.012086 -0.064073   4.092283
99 -0.166038  0.078918 -0.036810 -0.105878 -17.381664

[100 rows x 5 columns]


### Split the Data into train and test data.

In [3]:
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=315)

### Scale the feature data

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)



### Create Regression Model

In [5]:
ridge = Ridge()
ridge.fit(X_train,y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


### Score the model

In [6]:
print(ridge.score(X_train,y_train))
print(ridge.score(X_test,y_test))

0.9897349051751464
0.9066625487300933


### Create prediction

In [7]:
y_pred = ridge.predict(X_test)

mae = mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)

print(f'Mean ABS Error - {mae}')
print(f'Mean SQR Error - {mse}')
print(f'R2 Score - {r2}')


Mean ABS Error - 2.323397492628678
Mean SQR Error - 8.340112051012444
R2 Score - 0.9066625487300933


### Hyper Parameter Tuning

In [None]:
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
}

ridge_cv = GridSearchCV(ridge, param_grid, cv=3, n_jobs=-1)
ridge_cv.fit(X_train,y_train)

0,1,2
,estimator,Ridge()
,param_grid,"{'alpha': [0.0001, 0.001, ...]}"
,scoring,
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,alpha,0.1
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


### Cross Validation Prediction

In [13]:
y_pred2 = ridge_cv.predict(X_test)

mae = mean_absolute_error(y_test,y_pred2)
mse = mean_squared_error(y_test,y_pred2)
r2 = r2_score(y_test,y_pred2)

print(f'Mean ABS Error - {mae}')
print(f'Mean SQR Error - {mse}')
print(f'R2 Score - {r2}')

Mean ABS Error - 2.3076800540657967
Mean SQR Error - 8.069274066575122
R2 Score - 0.9096936023921829


In [15]:
ridge_cv.best_estimator_
print(ridge_cv.best_estimator_.coef_)
print(ridge_cv.best_estimator_.intercept_)

[6.63466767 1.02227748 6.59925944 2.49354893]
-1.101485718877789


### Plot Features

In [None]:
rows = 2
cols = 2
feature_x = X
feature_y = X[:,1:]

fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10,8))
for i in range(rows):
    for j in range(cols):
        ax = axs[i,j]


In [None]:
arr = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

arr1 = arr[:,1:]


SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='? (3402186469.py, line 8)