In [37]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

In [6]:
cols = ['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS',
        'RAD','TAX','PTRATIO','B','LSTAT','MEDV']

In [7]:
data = pd.read_csv("../ml_data/housing.data",
                   sep=' +', engine='python', header=None, 
                   names=cols)

In [8]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2


In [9]:
# transforming data - add column with '1' as first column
data['Ones'] = 1

In [10]:
data.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV,Ones
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296.0,15.3,396.9,4.98,24.0,1
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,396.9,9.14,21.6,1
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,392.83,4.03,34.7,1
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,394.63,2.94,33.4,1
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,396.9,5.33,36.2,1


In [12]:
X = data[['Ones']+cols[:-1]].values

In [15]:
y = data['MEDV'].values.reshape(-1,1)

In [16]:
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [20]:
scaler = StandardScaler()
scaler.fit(y)
y = scaler.transform(y)

In [22]:
weights = np.ones((1,X.shape[1])).T/1000

In [23]:
def predict(X, weights):
        predictions = np.dot(X,weights)
        return predictions.reshape(-1,1)

In [24]:
eta =   0.01
lmbda =   0.1
epochs = 100
N = X.shape[0] 

In [26]:
# initaiating hyperparameters
eta =   0.01
lmbda =   0.1
epochs = 100
N = X.shape[0] 

In [35]:
# learning model
for e in range(epochs):
    
    y_pred = predict(X, weights)
    error_pred = np.sum(np.square(y - y_pred)) + lmbda*np.sum(np.square(weights))    
    delta_weight = np.zeros(weights.shape[0]).reshape(-1,1)    
    
    for j in range(weights.shape[0]):
    
        lin_delta_weights = -2 * np.sum(np.dot(X[:,j], (y - y_pred))) / N
        
        if j ==0:
            weights[j] = weights[j] - eta * lin_delta_weights
        else:
            weights[j] = (1-2*eta*lmbda) * weights[j] - eta * lin_delta_weights
            
    #print(e, error_pred, np.square(weights).sum())

In [31]:
# creating models from scikit learn - just to compare the results
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X,y)

LinearRegression()

In [34]:
ridge = Ridge(alpha = lmbda)
ridge.fit(X,y)

Ridge(alpha=0.1)

In [38]:
# calculating R2
r2_my_ridge = r2_score(y, predict(X, weights))
r2_lr = r2_score(y, lr.predict(X))
r2_ridge = r2_score(y, ridge.predict(X))
 

In [39]:
# showing coefficients
print(weights.reshape(1,-1))
print(lr.coef_)
print(ridge.coef_)

[[ 0.001      -0.07173447  0.0627135  -0.04376036  0.08274894 -0.10459682
   0.32154671 -0.01848944 -0.20567141  0.0841941  -0.05955971 -0.18982937
   0.09076159 -0.34674316]]
[[ 0.         -0.10101708  0.1177152   0.0153352   0.07419883 -0.22384803
   0.29105647  0.00211864 -0.33783635  0.28974905 -0.22603168 -0.22427123
   0.09243223 -0.40744693]]
[[ 0.         -0.10092515  0.11754744  0.0150717   0.07423681 -0.22354225
   0.29115152  0.00204844 -0.33754341  0.28895963 -0.22528933 -0.2241805
   0.09242769 -0.40730458]]


In [40]:
# printing results
print('R2\t','My Ridge\t', r2_my_ridge, 
      '\tLINREG\t', r2_lr, '\tRIDGE\t', r2_ridge)
print('W \t','My Ridge\t', np.square(weights).sum(), 
      '\tLINREG\t', np.square(lr.coef_).sum(), 
      '\tRIDGE\t', np.square(ridge.coef_).sum())

R2	 My Ridge	 0.7270238874362798 	LINREG	 0.7406426641094094 	RIDGE	 0.7406425368463014
W 	 My Ridge	 0.34995722805300017 	LINREG	 0.6386607850662823 	RIDGE	 0.637371378870239
