In [89]:
import numpy as np
from numpy.random import rand
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'
pio.renderers.default = 'notebook'
import pandas as pd

In [74]:
X = rand(20,2)  # X = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 2], [2, 3]])
a = 1.3
b = 0.6
c = 3
y = np.dot(X, np.array([a, b])) + c + + 0.2 * rand(X.shape[0]) # y = a * x_0 + b * x_1 + c with added noise


In [78]:
fig = px.scatter_3d(x=X[:, 0], y=X[:, 1], z=y)
fig.update_layout(scene = dict(xaxis_title='x_0', yaxis_title='x_1', zaxis_title='y'))
fig.show()

In [87]:
reg = LinearRegression().fit(X, y)
print(f"{reg.score(X, y)=}") # this is R^2 = (Var(estimated plane) - Var(mean plane)) / (Var(mean plane))
print(f"{reg.coef_=}. Without noise it should be [{a}, {b}]")
print(f"{reg.intercept_=}. Without noise it should be {c}")

sample_point = np.array([[3, 5]])
print(f"{reg.predict(sample_point)[0]=}. We expect {a * sample_point[0][0] + b * sample_point[0][1] + c}")

reg.score(X, y)=0.9887142220723262
reg.coef_=array([1.36230016, 0.67079484]). Without noise it should be [1.3, 0.6]
reg.intercept_=3.034375834968137. Without noise it should be 3
reg.predict(sample_point)[0]=10.475250511266086. We expect 9.9


In [101]:
df = pd.DataFrame([
    {'taille':30, 'prix':1000},
    {'taille':31, 'prix':1500},
    {'taille':32, 'prix':1200},
    {'taille':36, 'prix':1000},
    {'taille':43, 'prix':2000},
    {'taille':40, 'prix':1800},
])
display(df.head(3))
fig = px.scatter(data_frame=df, x='taille', y='prix', labels=dict(taille="taille en m^2", prix="prix du loyer au m^2"))
fig.update_xaxes(range=[25, 45])
fig.update_yaxes(range=[0, 2500])
fig.show()

Unnamed: 0,taille,prix
0,30,1000
1,31,1500
2,32,1200


In [126]:
X = df['taille'].values.reshape(-1,1)
y = df['prix'].values.reshape(-1,1)
reg = LinearRegression().fit(X, y)
print(f"{reg.score(X, y)=}") # this is R^2 = (Var(estimated plane) - Var(mean plane)) / (Var(mean plane))
print(f"{reg.coef_=}")
print(f"{reg.intercept_=}")
#sample_point = np.array([[3, 5]])
#print(f"{reg.predict(sample_point)[0]=}. We expect {a * sample_point[0][0] + b * sample_point[0][1] + c}")

reg.score(X, y)=0.5929154286022065
reg.coef_=array([[61.48325359]])
reg.intercept_=array([-755.74162679])


In [123]:
reg.intercept_[0]

-755.7416267942583

In [127]:
x_min = df['taille'].min()
x_max = df['taille'].max()
print(x_min, x_max)
x_line = np.linspace(start=x_min, stop=x_max, num=10)
y_line = reg.coef_[0][0] * x_line + reg.intercept_[0]

30 43


In [128]:
fig_1 = px.scatter(data_frame=df, x='taille', y='prix', labels=dict(taille="taille en m^2", prix="prix du loyer au m^2"))
fig_1.update_xaxes(range=[25, 45])
fig_1.update_yaxes(range=[0, 2500])

fig_2 = px.line(x=x_line, y=y_line)
fig = go.Figure(data = fig_1.data + fig_2.data)
fig.show()

In [131]:
reg.predict([[36]])[0]

array([1457.65550239])