In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')

In [1]:
df=pd.read_csv("/kaggle/input/tvradionewspaperadvertising/Advertising.csv")

In [1]:
df.info()

In [1]:
df.shape

In [1]:
df.head(5)

In [1]:
df.isnull().sum().sum()

## EDA

In [1]:
sns.pairplot(df, kind="reg")

In [1]:
fig, ax = plt.subplots(1, 3, figsize=(25, 5))
sns.regplot(df['TV'], df['Sales'], ax=ax[0])
sns.regplot(df['Radio'], df['Sales'], ax=ax[1])
sns.regplot(df['Newspaper'], df['Sales'], ax=ax[2])

## Model

In [1]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV

In [1]:
X=df.drop('Sales', axis=1)
Y=df[['Sales']]

In [1]:
x_train, x_test, y_train, y_test=train_test_split(X, Y, test_size=0.2, random_state=32)

In [1]:
results={'Model':[], 'rmse':[], 'r2score':[]}

In [1]:
def metrics(y_true, y_pred):
    mse=mean_squared_error(y_true, y_pred)
    r2=np.round(r2_score(y_true, y_pred), 3)
    print(f'Root Mean squared error: {np.sqrt(mse)}')
    print(f'R2 score: {r2}')
    return mse, r2

In [1]:
def dict_append(dict_, model, error, r2):
    dict_['Model'].append(type(model).__name__)
    dict_['rmse'].append(error)
    dict_['r2score'].append(r2)
    return dict

In [1]:
lr=LinearRegression()
lr.fit(x_train, y_train)
y_test_pred=lr.predict(x_test)
lr_rmse, lr_r2=metrics(y_test, y_test_pred)
dict_append(results, lr, lr_rmse, lr_r2)

In [1]:
rg=Ridge()
rg.fit(x_train, y_train)
y_test_pred=rg.predict(x_test)
rg_rmse, rg_r2=metrics(y_test, y_test_pred)
dict_append(results, rg, rg_rmse, rg_r2)

In [1]:
ls=Lasso()
ls.fit(x_train, y_train)
y_test_pred=ls.predict(x_test)
ls_rmse, ls_r2=metrics(y_test, y_test_pred)
dict_append(results, ls, ls_rmse, ls_r2)

In [1]:
el=ElasticNet()
el.fit(x_train, y_train)
y_test_pred=el.predict(x_test)
el_rmse, el_r2=metrics(y_test, y_test_pred)
dict_append(results, el, el_rmse, el_r2)

In [1]:
result_frame=pd.DataFrame(results)
result_frame

## Model Tuning

In [1]:
tuning_rmse=[]
tuning_score=[]
tuning_params=[]

In [1]:
def gridsearch(model, params, l1=tuning_rmse, l2=tuning_score, l3=tuning_params, x=X, y=Y):
    X_train, X_test, Y_train, Y_test=train_test_split(x, y, test_size=0.2, random_state=32)
    m=GridSearchCV(model, params, cv=10, return_train_score=True)
    m.fit(X_train, Y_train)
    Y_test_pred=m.predict(X_test)
    rmse, r2 = metrics(Y_test, Y_test_pred)
    l1.append(rmse)
    l2.append(r2)
    l3.append(m.best_params_)
    print(m.best_params_)
    return rmse, r2

In [1]:
linear_parameters= {'fit_intercept': [True , False]}

In [1]:
lr_rmse_, lr_r2_ = gridsearch(LinearRegression(), linear_parameters)

In [1]:
ridge_parameters={'alpha': [0.1, 0.01, 0.005, 0.05, 0.001 ,0.2, 0.3, 0.5, 0.6, 0.8, 0.9, 1, 2, 3, 4],
                  'solver': ['auto', 'svd', 'cholesky']}

In [1]:
rg_rmse_, rg_r2_ = gridsearch(Ridge(), ridge_parameters)

In [1]:
lasso_parameters={'selection': ['cyclic', 'random'],
                  'alpha': [0.1, 0.01, 0.005, 0.05, 0.001 ,0.2, 0.3, 0.5, 0.6, 0.8, 0.9, 1, 2, 3, 4]}

In [1]:
ls_rmse_, ls_r2_ = gridsearch(Lasso(), lasso_parameters)

In [1]:
elastic_net_parameters={'alpha': [0.1, 0.01, 0.005, 0.05, 0.001 ,0.2, 0.3, 0.5, 0.6, 0.8, 0.9, 1, 2, 3, 4],
                        'selection': ['cyclic', 'random']}

In [1]:
el_rmse_, el_r2_ = gridsearch(ElasticNet(), elastic_net_parameters)

In [1]:
tuning_frame=pd.DataFrame({'Model':['Linear', 'Ridge', 'Lasso', 'ElasticNet'],
                           'RMSE': tuning_rmse,
                           'Score': tuning_score,
                           'Best Params': tuning_params})

In [1]:
tuning_frame