# Coklu Dogrusal Regresyon

In [20]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
data_file = "Advertising.csv"

In [4]:
df = pd.read_csv(data_file, usecols=[1, 2, 3, 4])
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [7]:
X = df.drop("sales", axis=1)
y = df["sales"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3232)
print("X_train shape : ", X_train.shape)
print("X_test shape : ", X_test.shape)
print("y_train shape : ", y_train.shape)
print("y_test shape : ", y_test.shape)

X_train shape :  (160, 3)
X_test shape :  (40, 3)
y_train shape :  (160,)
y_test shape :  (40,)


In [11]:
linearModel = sm.OLS(y_train, X_train).fit()
linearModel.summary()

0,1,2,3
Dep. Variable:,sales,R-squared (uncentered):,0.982
Model:,OLS,Adj. R-squared (uncentered):,0.982
Method:,Least Squares,F-statistic:,2900.0
Date:,"Sat, 14 Jan 2023",Prob (F-statistic):,3.2999999999999997e-137
Time:,20:02:46,Log-Likelihood:,-338.77
No. Observations:,160,AIC:,683.5
Df Residuals:,157,BIC:,692.8
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
TV,0.0537,0.001,36.384,0.000,0.051,0.057
radio,0.2193,0.011,20.128,0.000,0.198,0.241
newspaper,0.0201,0.008,2.625,0.010,0.005,0.035

0,1,2,3
Omnibus:,8.087,Durbin-Watson:,1.758
Prob(Omnibus):,0.018,Jarque-Bera (JB):,10.702
Skew:,-0.307,Prob(JB):,0.00474
Kurtosis:,4.108,Cond. No.,13.3


In [16]:
model = LinearRegression().fit(X_train, y_train)
print(model.intercept_)
print(model.coef_)

2.8389201554149377
[0.04571651 0.1889423  0.00273089]


# Tahmin

In [22]:
# Egitim hatasi
rmse = np.sqrt(mean_squared_error(y_train, model.predict(X_train)))
rmse

1.6864328873283867

In [23]:
# Test hatasi
rmse = np.sqrt(mean_squared_error(y_test, model.predict(X_test)))
rmse

1.6066477988312238

# Model Tuning

In [24]:
cross_val_score(model, X_train, y_train, cv=10, scoring="r2").mean()

0.8857650691357432

In [25]:
cross_val_score(model, X_test, y_test, cv=10, scoring="r2").mean()

0.7177353805621934

In [26]:
model.score(X_train, y_train)

0.8985941668381002

In [27]:
np.sqrt(-cross_val_score(model, X_train, y_train, cv=10, scoring="neg_mean_squared_error")).mean()

1.7137685346396236

In [28]:
np.sqrt(-cross_val_score(model, X_test, y_test, cv=10, scoring="neg_mean_squared_error")).mean()

1.7036875690798876