In [1]:
import pandas as pd 
import numpy as np 
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor , Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score, accuracy_score

In [2]:
df = pd.read_csv("C:\\Users\\sanja\\OneDrive\\Pictures\\All working files\\data\\advertising (2).csv")
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [3]:
X = df.drop(columns=['Sales'], axis=1)
y = df['Sales']

In [9]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.15,random_state=2)

### linear regression without Standardization

In [12]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_y_predicted = lr_model.predict(X_test)
print("R2 score of linear regression",r2_score(y_test,lr_y_predicted))

R2 score of linear regression 0.8242568507905111


### linear regression with standardization

In [20]:
scaler = StandardScaler()
new_X = scaler.fit_transform(X=X,y=y)
X_train , X_test , y_train , y_test = train_test_split(new_X,y,test_size=0.15,random_state=2)
lr_model1 = LinearRegression()
lr_model1.fit(X_train, y_train)
lr_y_predicted = lr_model1.predict(X_test)
print("R2 score of linear regression with std",r2_score(y_test,lr_y_predicted))

R2 score of linear regression with std 0.8242568507905113


In [21]:
X.corr()

Unnamed: 0,TV,Radio,Newspaper
TV,1.0,0.054809,0.056648
Radio,0.054809,1.0,0.354104
Newspaper,0.056648,0.354104,1.0


### linear regression after PCA

In [14]:
pca = PCA(n_components=3)
new_X = pca.fit_transform(X=X)
X_train , X_test , y_train , y_test = train_test_split(new_X,y,test_size=0.15,random_state=2)
lr_model2 = LinearRegression()
lr_model2.fit(X_train, y_train)
lr_y_predicted = lr_model2.predict(X_test)
print("R2 score of linear regression after applying pca",r2_score(y_test,lr_y_predicted))

R2 score of linear regression after applying pca 0.8242568507905113


### linear regression with polynomial

In [30]:
poly = PolynomialFeatures(degree=3,include_bias=False)
new_X = poly.fit_transform(X=X)
X_train , X_test , y_train , y_test = train_test_split(new_X,y,test_size=0.15,random_state=2)
lr_model3 = LinearRegression()
lr_model3.fit(X_train, y_train)
lr_y_predicted = lr_model3.predict(X_test)
print("R2 score of linear regression after applying polynomial",r2_score(y_test,lr_y_predicted))

R2 score of linear regression after applying polynomial 0.8582244883680423


### stochastic gradient decent

In [37]:
scaler = StandardScaler()
new_X = scaler.fit_transform(X=X)
X_train , X_test , y_train , y_test = train_test_split(new_X,y,test_size=0.15,random_state=2)
model4 = SGDRegressor(max_iter=1200, eta0=0.05)
model4.fit(X_train, y_train)
lr_y_predicted = model4.predict(X_test)
print("R2 score of  stochastic gradient decent",r2_score(y_test,lr_y_predicted))

R2 score of  stochastic gradient decent 0.8260895019880816


### Ridge

In [11]:
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.15, random_state=2)

R = Ridge(alpha=0.1)
R.fit(X_train,y_train)
y_pred = R.predict(X_test)
r2_score(y_test,y_pred)

0.8242569514642958

### stochastic Gradient with Ridge

In [97]:
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.15, random_state=2)

sgd = SGDRegressor(max_iter=5000, alpha=3, penalty='l2', eta0=0.0001)
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
r2_score(y_test,y_pred)

0.6112045696041581

### Lasso

In [102]:
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.15, random_state=2)
L = Lasso(alpha=5)
L.fit(X_train,y_train)
y_pred = L.predict(X_test)
r2_score(y_test,y_pred)

0.8300627759812244

### stochastic with lasso

In [104]:
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.15, random_state=2)

sgd = SGDRegressor(max_iter=5000, alpha=3, penalty='l1', eta0=0.0001)
sgd.fit(X_train,y_train)
y_pred = sgd.predict(X_test)
r2_score(y_test,y_pred)

0.6042187139278696

In [106]:
X_train , X_test, y_train, y_test = train_test_split(X,y,test_size=0.15, random_state=2)
E = ElasticNet(alpha=1,l1_ratio=0.8)
E.fit(X_train,y_train)
y_pred = E.predict(X_test)
r2_score(y_test,y_pred)

0.8262647509470965

In [108]:
df = pd.read_csv("D:\\CAMPUS_x\\week_27 Regularization\\task regularization\\winequalityN - winequalityN.csv")
df

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,white,7.0,0.270,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,white,6.3,0.300,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,white,8.1,0.280,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,white,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,white,7.2,0.230,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6492,red,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
6493,red,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,,11.2,6
6494,red,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
6495,red,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [109]:
df1 = pd.read_csv("C:\\Users\\sanja\\OneDrive\\Pictures\\All working files\\data\\HousingData (1).csv")
df1

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [114]:
df1.isna().sum()

CRIM       20
ZN         20
INDUS      20
CHAS       20
NOX         0
RM          0
AGE        20
DIS         0
RAD         0
TAX         0
PTRATIO     0
B           0
LSTAT      20
MEDV        0
dtype: int64

In [115]:
df1.dropna(inplace=True)

In [117]:
df1.shape

(394, 14)

In [118]:
X = df1.drop(columns=['MEDV'],axis=1)
y = df1['MEDV']

In [119]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)
lm = LinearRegression()
lm.fit(X_train, y_train)
y_predicted = lm.predict(X_test)
r2_score(y_test,y_predicted)

0.7605719989452941

In [138]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)
R = Ridge(alpha=25)
R.fit(X_train, y_train)
y_predicted = R.predict(X_test)
r2_score(y_test,y_predicted)

0.8015101440657348

In [144]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)
L = Lasso(alpha=0.7)
L.fit(X_train, y_train)
y_predicted = L.predict(X_test)
r2_score(y_test,y_predicted)

0.7955390370207996

In [152]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)
E = ElasticNet(alpha=0.8, l1_ratio=0.9)
E.fit(X_train, y_train)
y_predicted = E.predict(X_test)
r2_score(y_test,y_predicted)

0.7902203900121931

In [211]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=2)
S = SGDRegressor(max_iter=9000, eta0=0.0000001,learning_rate='constant')
S.fit(X_train, y_train)
y_predicted = S.predict(X_test)
r2_score(y_test,y_predicted)

0.177362749643158