In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [None]:
data = sns.load_dataset("mpg")

In [None]:
data.drop("name",axis=1,inplace=True)

In [None]:
# data.head(2)
data.info()

In [None]:
# horsepower has some null value
sns.boxplot(data["horsepower"])
plt.show()

In [None]:
# since we have not done oulier treatment, then better idea would be to replace the missing with median
data["horsepower"].fillna(data["horsepower"].median(),inplace=True)

In [None]:
# for i in data.columns:
#   print(f"{i} : {data[i].unique()}")

data["origin"].unique()

In [None]:
# data encoding
data["origin"] = data["origin"].map({"usa":1,"japan":2,"europe":3})

In [None]:
# Seperate Independent Variable and Dependent Variable
X = data.drop("mpg",axis=1)
y = data["mpg"]

In [None]:
# Split the data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=1)

In [None]:
print(f"X_train : {X_train.shape}")
print(f"X_test : {X_test.shape}")
print(f"y_train : {y_train.shape}")
print(f"y_test : {y_test.shape}")

In [None]:
# Make the simple Linear Regression Model
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model

In [None]:
# Train The Model
model.fit(X_train,y_train)

In [None]:
marks = [34,56,7,64,89]
# index = 0
# for i in marks:
#   print(i)
#   if index == 2:
#     print(f"Index without enumerate {i}")
#   index+=1


for index, i in enumerate(marks):
  print(i)
  if index == 2:
    print(f"Index with enumerate {i}")


In [None]:
# Model Coefficient
for i, col_name in enumerate(X_train.columns):
  print(f"{col_name} : {round(model.coef_[i],4)}")

# Observation : Coefficients are relatively smaller, if one independet variable changes
#  There will be not much difference in predication
#  This sometime called as smoother model

# These features might not be contributing in model training

In [None]:
from sklearn.metrics import r2_score

y_pred = model.predict(X_test)

In [None]:
print(f"R2 Score : {r2_score(y_test,y_pred)}")

In [None]:
# Cross Validataion With Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import Lasso, Ridge,ElasticNet

## **LASSO**

### **GridSearchCV with Lasso**

In [None]:
# define the parameter
lasso = Lasso()
lasso

In [None]:
# grid search cv : Dictionary with parameters names as keys and lists of parameter settings to try as
param_grid = {"alpha":[0.001,0.01,0.1,1,10,100]}
param_grid

In [None]:
grid_search = GridSearchCV(lasso,param_grid,cv=5,scoring="r2",verbose=2)
grid_search

In [None]:
grid_search.fit(X_train,y_train)

In [None]:
# Best Parameter
grid_search.best_params_

In [None]:
# Accuracy Score
grid_search.best_score_

In [None]:
# Best Estimator model
grid_search.best_estimator_

In [None]:
for i, col_name in enumerate(X_train.columns):
  print(f"{col_name} : {round(grid_search.best_estimator_.coef_[i],4)}")

In [None]:
y_pred_grid = grid_search.predict(X_test)

In [None]:
# Accuracy of Model
r2_score(y_test,y_pred_grid)

### **RandomizedSearchCV with Lasso**

In [None]:
# Randomized Seach CV for Lasso
lasso = Lasso()
lasso

In [None]:
para_dist = {"alpha":[0.001,0.01,0.1,1,10,100]}
para_dist

In [None]:
random_search = RandomizedSearchCV(lasso,param_distributions=para_dist,n_iter=3,cv=5,scoring="r2",verbose=2)
# n_iter choose random 3 value in the para_dist for training model
random_search

In [None]:
# Train the model
random_search.fit(X_train,y_train)

In [None]:
for i, col_x in enumerate(X_train.columns):
  print(f"{col_x} : {round(random_search.best_estimator_.coef_[i],4)}")

In [None]:
random_search.best_score_

In [None]:
random_search.best_params_

In [None]:
y_pred_random = random_search.predict(X_test)

In [None]:
r2_score(y_test,y_pred_random)

## **Ridge**

### **GridSearchCV with Ridge**

In [None]:
ridge = Ridge()
para_grid = {"alpha":[0.001,0.01,0.1,1,10,100]}
para_grid

In [None]:
ridge = GridSearchCV(ridge,param_grid,cv=5,scoring="r2",verbose=2)
ridge

In [None]:
ridge.fit(X_train,y_train)

# Best Estimeter
print(ridge.best_estimator_)

y_pred_ridge = ridge.best_estimator_.predict(X_test)
print(r2_score(y_test,y_pred_ridge))

### **RandomizedSearchCV with Ridge**

In [None]:
ridge = Ridge()
para_dist = {"alpha":[0.001,0.01,0.1,1,10,100]}

random_search = RandomizedSearchCV(ridge,param_grid,n_iter=2,cv=5,scoring="r2",verbose=2)
random_search

In [None]:
random_search.fit(X_train,y_train)

# Best Estimeter
print(random_search.best_estimator_)

y_pred_random_search = random_search.best_estimator_.predict(X_test)
print(r2_score(y_test,y_pred_random_search))

## **Elastic Net**

### **GridSearchCV with Elastic Net**

In [None]:
model = ElasticNet()
model

In [None]:
para_grid = {
               "alpha":[0.001,0.01,0.1,1,10,100],
               "l1_ratio":[0.1,0.4,0.9]
            }

model_elastic_net = GridSearchCV(model,param_grid=para_grid,cv=5,scoring="r2",verbose=2)
model_elastic_net

In [None]:
model_elastic_net.fit(X_train,y_train)

In [None]:
print(f"  Best Estimator : {model_elastic_net.best_estimator_}")
print(f"  Best Score : {model_elastic_net.best_score_}")
print(f"  Best Params : {model_elastic_net.best_params_}")
y_pred_elasticnet = model_elastic_net.best_estimator_.predict(X_test)
print(f" r2 Scoer : {r2_score(y_test,y_pred_elasticnet)}")

### **RandomizedSearchCV with Elastic Net**

In [None]:
model = ElasticNet()
model

In [None]:
para_dist = {
               "alpha":[0.001,0.01,0.1,1,10,100],
               "l1_ratio":[0.1,0.4,0.9]
            }

model_elastic_net = RandomizedSearchCV(model,param_distributions=para_dist,n_iter = 3,cv=5,scoring="r2",verbose=2)
model_elastic_net

In [None]:
model_elastic_net.fit(X_train,y_train)

In [None]:
print(f"  Best Estimator : {model_elastic_net.best_estimator_}")
print(f"  Best Score : {model_elastic_net.best_score_}")
print(f"  Best Params : {model_elastic_net.best_params_}")
y_pred_elasticnet = model_elastic_net.best_estimator_.predict(X_test)
print(f" r2 Scoer : {r2_score(y_test,y_pred_elasticnet)}")