In [238]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [239]:
X,y=make_regression(n_samples=100,n_features=2,n_informative=2,n_targets=1,noise=50)

In [240]:
df=pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})

In [241]:
df.head()

Unnamed: 0,feature1,feature2,target
0,-0.490433,-0.490779,-18.128594
1,-1.206368,-1.343147,-184.540045
2,0.358817,-0.7275,30.085658
3,1.103251,1.60002,102.223555
4,0.210535,0.109607,-39.701026


In [242]:
fig = px.scatter_3d(df,x='feature1',y='feature2',z='target')
fig.show()

In [243]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=3)

In [244]:
from sklearn.linear_model import LinearRegression

In [245]:
lr = LinearRegression()

In [246]:
lr.fit(X_train,y_train)

In [247]:
y_pred = lr.predict(X_test)

In [248]:
print("MAE",mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))
print("R2 score",r2_score(y_test,y_pred))

MAE 33.87722613077271
MSE 1737.9464002650552
R2 score 0.7903349721656002


In [249]:
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)
final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T
z_final = lr.predict(final).reshape(10,10)
z = z_final

In [250]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')
fig.add_trace(go.Surface(x = x, y = y, z =z ))
fig.show()

In [251]:
lr.coef_

array([78.87619087, 25.60072332])

In [252]:
lr.intercept_

-0.818535774211469

### Code from Scratch

In [253]:
import numpy as np
from sklearn.datasets import load_diabetes

In [254]:
X,y = load_diabetes(return_X_y=True)

In [255]:
X

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]])

In [256]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [257]:
X.shape

(442, 10)

In [258]:
y.shape

(442,)

#### Using Sklearn's Linear Regression

In [259]:
from sklearn.model_selection import train_test_split

In [260]:
X_train,y_train,X_test,y_test=train_test_split(X,y,test_size=0.2,random_state=3)

In [261]:
print(X_train.shape)
print(X_test.shape)

(353, 10)
(353,)


In [262]:
from sklearn.linear_model import LinearRegression

In [263]:
reg=LinearRegression()

#### Making our own LR

In [264]:
class MeraLR:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        
        # calcuate the coeffs
        betas = np.linalg.inv(np.dot(X_train.T,X_train)).dot(X_train.T).dot(y_train)
        self.intercept_ = betas[0]
        self.coef_ = betas[1:]
    
    def predict(self,X_test):
        y_pred = np.dot(X_test,self.coef_) + self.intercept_
        return y_pred
        

In [265]:
lr = MeraLR()

In [266]:
X_train.shape

(353, 10)

In [267]:
np.insert(X_train,0,1,axis=1).shape

(353, 11)