In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns",100)
pd.set_option("display.max_rows",100)

In [33]:
from scipy import stats
import matplotlib as plt
%matplotlib inline
import statsmodels.api as sm
import seaborn as sns

In [55]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge,Lasso
from sklearn.linear_model import ElasticNet
from sklearn.tree import ExtraTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor

In [21]:
from sklearn.model_selection import train_test_split

In [22]:
from sklearn.linear_model import LinearRegression

In [53]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [7]:
df = pd.read_csv('advertising.csv')

In [8]:
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [9]:
df.isnull().sum()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB


In [11]:
df.describe()

Unnamed: 0,TV,Radio,Newspaper,Sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [12]:
df.corr()

Unnamed: 0,TV,Radio,Newspaper,Sales
TV,1.0,0.054809,0.056648,0.782224
Radio,0.054809,1.0,0.354104,0.576223
Newspaper,0.056648,0.354104,1.0,0.228299
Sales,0.782224,0.576223,0.228299,1.0


In [14]:
abs(df.corr()['Sales']).sort_values(ascending=False)

Sales        1.000000
TV           0.782224
Radio        0.576223
Newspaper    0.228299
Name: Sales, dtype: float64

In [17]:
y = df['Sales']
x = df.drop(['Sales'], axis=1)

In [18]:
x

Unnamed: 0,TV,Radio,Newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


In [19]:
y

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: Sales, Length: 200, dtype: float64

In [20]:
x.shape

(200, 3)

### Modelling

In [36]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=13)

In [37]:
x_train

Unnamed: 0,TV,Radio,Newspaper
125,87.2,11.8,25.9
68,237.4,27.5,11.0
69,216.8,43.9,27.2
108,13.1,0.4,25.6
131,265.2,2.9,43.0
...,...,...,...
98,289.7,42.3,51.2
16,67.8,36.6,114.0
74,213.4,24.6,13.1
176,248.4,30.2,20.3


In [73]:
L=LinearRegression()
R=Ridge()
Lass=Lasso()
E=ElasticNet()
ExTree=ExtraTreeRegressor()
GBR=GradientBoostingRegressor()
KN=KNeighborsRegressor()

In [74]:
algos=[L,R,Lass,E,ExTree,GBR,KN]
algo_names=['LinearRegression','Ridge','Lasso','ElasticNet','ExtraTreeRegressor','GradientBoostingRegressor','KNeighborsRegressor']


In [75]:
def regression_funct(x,y):
    


    r_squared=[]
    rmse=[]
    mae=[]
    
    result=pd.DataFrame(columns=['R_Squared','RMSE','MAE'],index=algo_names)
    
    for item in algos:
        item.fit(x_train,y_train)
        item.predict(x_test)
        r_squared.append(r2_score(y_test,item.predict(x_test)))
        rmse.append((mean_squared_error(y_test,item.predict(x_test)))**.5)
        mae.append(mean_absolute_error(y_test,item.predict(x_test)))
        
    result.R_Squared=r_squared
    result.RMSE=rmse
    result.MAE=mae
        
    return result.sort_values('R_Squared',ascending=False)

In [76]:
regression_funct(x_train,y_train)

Unnamed: 0,R_Squared,RMSE,MAE
GradientBoostingRegressor,0.97981,0.780874,0.611246
ExtraTreeRegressor,0.963114,1.055462,0.765
LinearRegression,0.930859,1.445033,1.180093
Ridge,0.930858,1.445044,1.180097
ElasticNet,0.930518,1.448591,1.179088
Lasso,0.930026,1.45371,1.182754
KNeighborsRegressor,0.904469,1.698561,1.1925


In [43]:
data = {'TV' : [50,0,0], 'Radio':[0,50,0],'Newspaper':[0,0,50]}

In [45]:
data = pd.DataFrame(data)

In [46]:
data

Unnamed: 0,TV,Radio,Newspaper
0,50,0,0
1,0,50,0
2,0,0,50


In [47]:
new_data = L.predict(data)

In [48]:
new_data

array([ 5.29436311, 12.26947376,  2.86000775])

In [49]:
coef = pd.DataFrame(L.coef_,x.columns,columns=['coef']).sort_values(by='coef',ascending = False)

In [51]:
coef

Unnamed: 0,coef
Radio,0.18546
TV,0.045958
Newspaper,-0.002729


In [52]:
L.intercept_

2.9964631326587767

##### Sales formula = (0.185 * radio) + (0.045 * tv) + (-0.002 * newspaper)