In [1]:
import numpy as np

In [2]:
from sklearn.linear_model import LinearRegression

In [3]:
from sklearn.datasets import load_boston

In [4]:
boston_obj = load_boston() ## return python dictionary-like object (of Bunch class)

In [5]:
type(boston_obj)

sklearn.utils.Bunch

In [6]:
type(boston_obj.data)

numpy.ndarray

In [7]:
boston_feature_values = boston_obj.data
boston_target_values = boston_obj.target

In [8]:
# print(boston_obj.DESCR)

In [9]:
dir(boston_obj)

['DESCR', 'data', 'feature_names', 'target']

In [10]:
boston_feature_values.shape

(506, 13)

In [11]:
boston_target_values.shape

(506,)

In [12]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()

std_scaler = std_scaler.fit(boston_feature_values)
boston_feature_values = std_scaler.transform(boston_feature_values)

In [116]:
def my_sgdRegressor(X, y, initial_weights, initial_learning_rate):
    n = X.shape[0]
    d = X.shape[1]
#     weights = initial_weights ### weights: (d X 1)
    weights = np.zeros((d, 1))
    learning_rate = initial_learning_rate
    
    iterations = 0
    while(True):
        random_row_index = np.random.randint(0, n, size = 1)
        
        random_row = X[random_row_index].reshape(d, 1) ### X[r_r_i]: a row vector (1 X d), reshaped to d X 1
#         print(random_row_index, end = ' ')
        y_pred = np.dot(weights.T, random_row) ### a real number (returned value by dot is 1 X 1 matrix)
        
        error = y[random_row_index] - y_pred ### for current randomly chosen row, error: n X 1 matrix
        
        grad = -2*error*random_row ### for current randomly chosen row; grad: d X 1
        
        preupdation_weights = weights
#         print('grad shape' + str(grad.shape))
#         print('weights.shape' + str(weights.shape))
        weights = weights - learning_rate * grad
        
        change_in_weights = weights - preupdation_weights
        distance_bw_weights = np.sqrt(np.sum(change_in_weights**2))
#         print(change_in_weights)
        if(iterations > 1 and distance_bw_weights < 0.001):
            break
               
#         learning_rate /= 2
        iterations += 1
        print('>iteration=%d, lrate=%.3f, error=%.3f' % (iterations, learning_rate, error**2))
    
    print(iterations)
    return weights

In [14]:
ones = np.ones(shape = (boston_feature_values.shape[0], 1))
X = np.concatenate([ones, boston_feature_values], axis = 1)

In [25]:
X[:2]

array([[ 1.        , -0.41771335,  0.28482986, -1.2879095 , -0.27259857,
        -0.14421743,  0.41367189, -0.12001342,  0.1402136 , -0.98284286,
        -0.66660821, -1.45900038,  0.44105193, -1.0755623 ],
       [ 1.        , -0.41526932, -0.48772236, -0.59338101, -0.27259857,
        -0.74026221,  0.19427445,  0.36716642,  0.55715988, -0.8678825 ,
        -0.98732948, -0.30309415,  0.44105193, -0.49243937]])

In [15]:
n = X.shape[0]
d = X.shape[1]

In [16]:
print(n, d,sep = '\n')

506
14


In [17]:
y = boston_target_values.reshape(n, 1)

In [18]:
initial_weights = np.random.normal(size = (d, 1)) ### initial_weights is d X 1 matrix

In [19]:
initial_weights.shape

(14, 1)

In [117]:
learned_weights = my_sgdRegressor(X, y, initial_weights, 0.001)

>iteration=1, lrate=0.001, error=841.000
>iteration=2, lrate=0.001, error=486.303
>iteration=3, lrate=0.001, error=65.596
>iteration=4, lrate=0.001, error=619.800
>iteration=5, lrate=0.001, error=298.748
>iteration=6, lrate=0.001, error=482.134
>iteration=7, lrate=0.001, error=573.713
>iteration=8, lrate=0.001, error=969.119
>iteration=9, lrate=0.001, error=1029.293
>iteration=10, lrate=0.001, error=399.551
>iteration=11, lrate=0.001, error=241.100
>iteration=12, lrate=0.001, error=418.767
>iteration=13, lrate=0.001, error=266.763
>iteration=14, lrate=0.001, error=374.666
>iteration=15, lrate=0.001, error=175.041
>iteration=16, lrate=0.001, error=569.715
>iteration=17, lrate=0.001, error=2526.875
>iteration=18, lrate=0.001, error=592.579
>iteration=19, lrate=0.001, error=254.602
>iteration=20, lrate=0.001, error=1950.971
>iteration=21, lrate=0.001, error=261.251
>iteration=22, lrate=0.001, error=580.544
>iteration=23, lrate=0.001, error=306.822
>iteration=24, lrate=0.001, error=530.193

In [118]:
print('Intercept: ' + str(learned_weights[0]))
print('Weights: ' + str(learned_weights.ravel()[1:]))

Intercept: [19.01783194]
Weights: [-0.43795744  0.06869281 -0.75439562  1.23784664 -1.03347834  3.13439618
 -0.146568   -1.24090478  0.6796527  -0.37264719 -1.29988125  1.03889565
 -2.80891923]


In [119]:
y_pred = np.dot(X, learned_weights)

In [120]:
mse = (1/n) * np.sum((y-y_pred)**2)

In [121]:
mse

36.87078611349265