In [1]:
import numpy as np
import sklearn
from sklearn import metrics, datasets
from sklearn.preprocessing import StandardScaler

In [2]:
X,y = datasets.fetch_california_housing(return_X_y=True)
print(X.shape)

(20640, 8)


In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)


In [4]:
print(X_train.shape)

print(X_test.shape)

(16512, 8)
(4128, 8)


In [5]:
X_train_temp = np.ones((X_train.shape[0], X_train.shape[1]+1))
X_test_temp = np.ones((X_test.shape[0], X_test.shape[1]+1))

X_train_temp[:,1:] = X_train
X_test_temp[:,1:] = X_test

In [6]:
scaler=StandardScaler()
scaler.fit(X_train_temp[:,1:])
X_train_temp[:,1:] = scaler.transform(X_train_temp[:,1:])
X_test_temp[:,1:] = scaler.transform(X_test_temp[:,1:])

# Normal equation with Regularization¶

In [7]:
def reg_regression(X, y, lambda1):
    XTX = np.dot(X.T, X)
    regularized_term = lambda1 * np.identity(XTX.shape[0])
    XTX_regularized = XTX + regularized_term
    XTy = np.dot(X.T, y)
    theta = np.dot(np.linalg.inv(XTX_regularized), XTy)
    return theta

lambda1 = 0.01  

theta = reg_regression(X_train, y_train,lambda1)


In [8]:
predictions=np.dot(theta,X_test.T)
print("MAE:", metrics.mean_absolute_error(y_true=y_test,y_pred=predictions))
print("MSE:", metrics.mean_squared_error(y_true=y_test,y_pred=predictions))

MAE: 0.5815014111291446
MSE: 0.6403102513068875


In [11]:
scaler=StandardScaler()
scaler.fit(X_train[:,1:])
X_train[:,1:]=scaler.transform(X_train[:,1:])
X_test[:,1:]=scaler.transform(X_test[:,1:])

# Gradient Decent with Regularization

In [12]:
theta=np.random.uniform(0,1,size=(X_train.shape[1]))
print(theta)

[0.43543595 0.38737221 0.36274455 0.67675662 0.48899633 0.07193932
 0.52951084 0.02431039]


In [13]:
niterate=1000
alpha=0.001
m=X_train.shape[0]
n=X_train.shape[1]
lam=0.001

for i in range(niterate):
    update=np.zeros(n)
    ypred = np.dot(X_train,theta)
    diff = ypred - y_train
    for j in range(n):
        update[j]=np.sum(diff*(X_train.T)[j])
    theta = theta*(1-alpha*(lam/m)) - (1/m)*(alpha*update)
print(theta)
    

[ 0.52393379  0.3906791  -0.12571052  0.25890474  0.26521712 -0.02422569
  0.22526008  0.17153461]


In [14]:
predictions=np.dot(X_test,theta)
print("MAE:", metrics.mean_absolute_error(y_true=y_test,y_pred=predictions))
print("MSE:", metrics.mean_squared_error(y_true=y_test,y_pred=predictions))

MAE: 0.6472028214665737
MSE: 0.7925280045917967


# BMI


In [15]:
import numpy as np
import random

random.seed(50)

weight=np.random.uniform(40,150,5000)
height=np.random.uniform(100,150,5000)

In [16]:
X=np.array([weight,height])
X=X.reshape(5000,2)

y=height/weight*weight
print(X)

[[108.92907334 108.45814798]
 [ 44.61821399  44.74101174]
 [ 71.99523944  41.78658981]
 ...
 [101.83302501 146.59208944]
 [126.18778344 131.47374898]
 [116.82704376 102.88692261]]


In [17]:
X_train_temp1=X[0:4000,:]
X_train = np.ones((X_train_temp1.shape[0],X_train_temp1.shape[1]+1))
X_train[:,1:]=X_train_temp1
print(X_train.shape)

(4000, 3)


In [18]:
X_test_temp1=X[4000:,:]
X_test = np.ones((X_test_temp1.shape[0],X_test_temp1.shape[1]+1))  
X_test[:,1:]=X_test_temp1

y_train=y[:4000]
y_test=y[4000:]

In [19]:
def reg_regression(X, y, lambda1):
    XTX = np.dot(X.T, X)
    regularized_term = lambda1 * np.identity(XTX.shape[0])
    XTX_regularized = XTX + regularized_term
    XTy = np.dot(X.T, y)
    theta = np.dot(np.linalg.inv(XTX_regularized), XTy)
    return theta

lambda1 = 0.01  

theta = reg_regression(X_train, y_train,lambda1)

    

In [20]:
predictions=np.dot(X_test,theta)
print("MAE:", metrics.mean_absolute_error(y_true=y_test,y_pred=predictions))
print("MSE:", metrics.mean_squared_error(y_true=y_test,y_pred=predictions))

MAE: 12.617501900621289
MSE: 212.52626701613428


# Lasso

In [45]:
import numpy as np
from sklearn import datasets,metrics
from sklearn.linear_model import Lasso,Ridge,SGDRegressor

In [46]:
X,y = datasets.fetch_california_housing(return_X_y=True)
print(X.shape)

(20640, 8)


In [47]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [48]:
alpha = 0.005
lr = Lasso(alpha=alpha)
lr.fit(X_train,y_train)

In [49]:
predictions = lr.predict(X_test)

In [50]:
print("MAE:",metrics.mean_absolute_error(y_test, predictions))
print("MSE:",metrics.mean_squared_error(y_test,predictions))

MAE: 0.5404741205283279
MSE: 0.5480452733619987


# Ridge

In [51]:
alpha = 0.005
rr = Ridge(alpha=alpha)
rr.fit(X_train,y_train)

In [52]:
predictions = rr.predict(X_test)

In [53]:
print("MAE:",metrics.mean_absolute_error(y_test, predictions))
print("MSE:",metrics.mean_squared_error(y_test,predictions))

MAE: 0.5379336218534003
MSE: 0.5446776747715351


# SGRegression

In [54]:
alpha = 0.05
sd = SGDRegressor(alpha=alpha, max_iter=1000, eta0=0.000001)
sd.fit(X_train,y_train)

In [55]:
predictions = sd.predict(X_test)

In [56]:
print("MAE:",metrics.mean_absolute_error(y_test, predictions))
print("MSE:",metrics.mean_squared_error(y_test,predictions))

MAE: 0.8559325991868714
MSE: 1.1845749868240945
