In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from IPython.display import display, HTML

In [2]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
feature_names = ['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']
target = raw_df.values[1::2, 2]
#create dataframe
boston = pd.DataFrame(data, columns=feature_names)
boston['MEDV']=target

boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
X=boston.drop(columns=['MEDV'])
y=boston['MEDV'].values.reshape(-1,1)

X = (X - X.mean())/X.std()

In [4]:
X=np.c_[np.ones((len(X),1)),X]

n = X.shape[0]
d = X.shape[1]
print(f'No of samples: {n} and No of features: {d}')

No of samples: 506 and No of features: 14


In [5]:
def intialize_betas(X,y):
    np.random.seed(0)
    betas = np.random.randn(d,1)
    return betas

betas = intialize_betas(X,y)
print(betas)

[[ 1.76405235]
 [ 0.40015721]
 [ 0.97873798]
 [ 2.2408932 ]
 [ 1.86755799]
 [-0.97727788]
 [ 0.95008842]
 [-0.15135721]
 [-0.10321885]
 [ 0.4105985 ]
 [ 0.14404357]
 [ 1.45427351]
 [ 0.76103773]
 [ 0.12167502]]


In [23]:
def soft_threshold(rho,lm):
    if rho>lm:
        return rho-lm
    elif rho<-lm:
        return rho+lm
    else:
        return 0


In [24]:
#Coordinate Descent Function

def coordinate_descent(X,y, no_of_iterations, lm):
    for iter in range(no_of_iterations):
        for j in range(0,len(betas)):
            temp_beta = betas.reshape(-1,1)
            temp_beta[j]=0.0
            oper = (y-np.dot(X,temp_beta)).reshape(-1,1)
            rho= np.dot(X[:,j].T,oper)
            lamda=lm
            z_j=np.square(X[:,j]).sum()
            betas[j] = soft_threshold(rho,lamda)/z_j
            
    return betas

In [25]:
pd.options.display.float_format = "{:,.2f}".format

In [26]:
betas = coordinate_descent(X,y,1000,10)

index=['BIAS','CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
 'TAX', 'PTRATIO', 'B', 'LSTAT']
pd.DataFrame(betas,columns=['Beta value'],index=index).T

Unnamed: 0,BIAS,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
Beta value,22.51,-0.87,1.0,0.0,0.68,-1.93,2.7,0.0,-3.0,2.33,-1.76,-2.02,0.83,-3.73
