In [2]:
from sklearn.datasets import make_regression
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [3]:
X,y = make_regression(n_samples=100, n_features=2, n_informative=2, n_targets=1, noise=50)

In [5]:
df = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})

In [6]:
df.shape

(100, 3)

In [7]:
df.head()

Unnamed: 0,feature1,feature2,target
0,-1.162783,-0.107282,-160.68385
1,-0.605665,-0.998507,-110.161164
2,0.121242,1.067048,86.560556
3,0.249781,0.786416,1.245471
4,0.529272,-0.621052,-9.17061


In [8]:
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

fig.show()

In [9]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=3)

In [10]:
from sklearn.linear_model import LinearRegression

In [11]:
lr = LinearRegression()

In [12]:
lr.fit(X_train,y_train)

0,1,2
,"fit_intercept  fit_intercept: bool, default=True Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered).",True
,"copy_X  copy_X: bool, default=True If True, X will be copied; else, it may be overwritten.",True
,"tol  tol: float, default=1e-6 The precision of the solution (`coef_`) is determined by `tol` which specifies a different convergence criterion for the `lsqr` solver. `tol` is set as `atol` and `btol` of :func:`scipy.sparse.linalg.lsqr` when fitting on sparse training data. This parameter has no effect when fitting on dense data. .. versionadded:: 1.7",1e-06
,"n_jobs  n_jobs: int, default=None The number of jobs to use for the computation. This will only provide speedup in case of sufficiently large problems, that is if firstly `n_targets > 1` and secondly `X` is sparse or if `positive` is set to `True`. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details.",
,"positive  positive: bool, default=False When set to ``True``, forces the coefficients to be positive. This option is only supported for dense arrays. For a comparison between a linear regression model with positive constraints on the regression coefficients and a linear regression without such constraints, see :ref:`sphx_glr_auto_examples_linear_model_plot_nnls.py`. .. versionadded:: 0.24",False


In [13]:
y_pred = lr.predict(X_test)

In [15]:
print("MAE =",mean_absolute_error(y_test,y_pred))
print("MSE =",mean_squared_error(y_test,y_pred))
print("R2 score =",r2_score(y_test,y_pred))

MAE = 39.59253413911588
MSE = 2263.3492706193783
R2 score = 0.6185427184197361


In [21]:
import numpy as np

x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)

xGrid, yGrid = np.meshgrid(x, y)


final = np.vstack((
    xGrid.ravel(),
    yGrid.ravel()
)).T   # shape = (100, 2)


z_final = lr.predict(final).reshape(10, 10)

In [23]:
# scatter plot
fig = px.scatter_3d(df, x='feature1', y='feature2', z='target')

# surface plot
fig.add_trace(
    go.Surface(
        x=xGrid,
        y=yGrid,
        z=z_final,
        opacity=0.6
    )
)

fig.show()

In [19]:
lr.coef_

array([70.94828253, 35.18125419])

In [20]:
lr.intercept_

np.float64(-4.249739914179146)