In [1]:
from sklearn.linear_model import LinearRegression
help(LinearRegression)

Help on class LinearRegression in module sklearn.linear_model._base:

class LinearRegression(sklearn.base.MultiOutputMixin, sklearn.base.RegressorMixin, LinearModel)
 |  LinearRegression(*, fit_intercept=True, normalize='deprecated', copy_X=True, n_jobs=None, positive=False)
 |  
 |  Ordinary least squares Linear Regression.
 |  
 |  LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
 |  to minimize the residual sum of squares between the observed targets in
 |  the dataset, and the targets predicted by the linear approximation.
 |  
 |  Parameters
 |  ----------
 |  fit_intercept : bool, default=True
 |      Whether to calculate the intercept for this model. If set
 |      to False, no intercept will be used in calculations
 |      (i.e. data is expected to be centered).
 |  
 |  normalize : bool, default=False
 |      This parameter is ignored when ``fit_intercept`` is set to False.
 |      If True, the regressors X will be normalized before regression by
 |    

In [2]:
dir(LinearRegression)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_feature_names',
 '_check_n_features',
 '_decision_function',
 '_estimator_type',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_set_intercept',
 '_validate_data',
 'fit',
 'get_params',
 'predict',
 'score',
 'set_params']

In [4]:
from sklearn.datasets import load_diabetes
data = load_diabetes()
X = data['data']
y = data['target']
print(X.shape, y.shape)

(442, 10) (442,)


In [10]:

from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X,y)
#print("w:", lr.coef_)
#print("b", lr.intercept_)

y_hat= lr.predict(X[:10])
print(y_hat)
print(y[:10])

[206.11667725  68.07103297 176.88279035 166.91445843 128.46225834
 106.35191443  73.89134662 118.85423042 158.80889721 213.58462442]
[151.  75. 141. 206. 135.  97. 138.  63. 110. 310.]


In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=0)
print(X_train.shape, y_train.shape)

(353, 10) (353,)


In [22]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_hat = lr.predict(X_test)

print("predicted: ", y_hat[:10])
print("true",y_test[:10])

predicted:  [238.46949509 248.92812015 164.05732579 120.30774826 187.42483427
 259.04746969 113.55788482 188.07762807 149.49521726 236.00758247]
true [321. 215. 127.  64. 175. 275. 179. 232. 142.  99.]


In [17]:
#score
print("accuracy", lr.score(X_test, y_test))

accuracy 0.3322332173106183


In [27]:
import numpy as np
class LinearRegression_UA:
    def __init__(self, eta=0.01,epoch = 10000, error= 1e-4):
        self.eta = eta
        self.epoch = epoch
        self.error = error
    
    def fit(self,X,y): #data preprocessing
        self.n_samples = X.shape[0]
        dummy = np.ones((self.n_samples,1))
        self.X = np.concatenate((dummy,X), axis = 1)
        self.y = y
        self.n_features = self.X.shape[1]
        
        #gradient descent
        self.w = np.random.randn(self.n_features)
        step = 0
        while True and step < self.epoch:
            g = 1/self.n_samples* self.X.T@(self.X@self.w-self.y)
            w_old = self.w
            self.w = self.w - self.eta* g
            step +=1
            if abs(self.w-w_old).sum()<self.error:
                break
    def predict(self,X):
        return X@self.w[1:]+ self.w[0]
          
        

In [28]:
lr_UA = LinearRegression_UA()
lr_UA.fit(X_train, y_train)
print("b:",lr_UA.w[0])
print("w:",lr_UA.w[1:])

y_hat = lr_UA.predict(X_test)
print("predicted:",y_hat[:10])
print("true: ", y[:10])

b: 151.76287511308215
w: [  39.23417438   -7.19074906  177.50337429  115.60005526   34.75640872
   18.36162679 -110.03770111  103.92470217  160.18539438   92.48531252]
predicted: [183.9500454  188.07744433 159.37613336 134.2513435  150.162509
 182.95822009 128.31305398 173.72357365 134.14066813 166.84194396]
true:  [151.  75. 141. 206. 135.  97. 138.  63. 110. 310.]


In [None]:
from sklearn.linear_model import Ridge
rg = Ridge(alpha=0.1)

In [9]:
#code linear regression model
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

class L2Regression:
    def __init__(self, alpha =0.01, max_steps= 10000, etr= 0.01, error=1e-4):
        self.alpha = alpha
        self.max_steps=max_steps
        self.etr = etr
        self.error = error
        
        
    def fit(self,X,y):
        self.n_samples = X.shape[0]
        dummy = np.ones((self.n_samples,1))
        self.X = np.concatenate((dummy,X),axis=1)
        self.y = y
        self.n_features=self.X.shape[1]
        
        self.w=np.random.randn(self.n_features)
        for i in range(self.max_steps):
            #compute gradient descent
            g = (1/self.n_samples)*(self.X.T@(self.X@self.w-self.y)+self.alpha*self.w) #L2 ridge penalty
            w_old = self.w
            self.w=self.w - self.etr * g
            
            #check convergence
            if np.abs(self.w-w_old).sum() < self.error:
                break
            
    def predict(self,X):
        dummy = np.ones((X.shape[0],1))
        X = np.concatenate((dummy,X), axis =1)
        return X@self.w
    
    def score(self,X,y):
        y_pred=self.predict(X)
        return 1 - ((y-y_pred)**2).sum() / ((y-y.mean())**2).sum()
    
#diabetes data
diab = datasets.load_diabetes()
X = diab.data
y = diab.target
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state=50)
model = L2Regression()
model.fit(X_train,y_train)
print("Training datasets->R-squared = ", model.score(X_train,y_train))
print("Testing datasets-> R-squared = ", model.score(X_test,y_test))

        


Training datasets->R-squared =  0.33203066310748175
Testing datasets-> R-squared =  0.3422080338505532
