In [30]:
import polars as pl
import numpy as np
import altair as alt

# Step 1: Generate Data
np.random.seed(0)  # For reproducibility
x = np.random.rand(100) * 10  # Random values between 0 and 10
y = 2 * x + 1 + np.random.randn(100)  # Linear relation with some noise

# Create a Polars DataFrame
df = pl.DataFrame({"x": x, "y": y})

# Step 2: Implement Linear Regression


def linear_regression(df):
    x_mean = df["x"].mean()
    y_mean = df["y"].mean()

    # Calculate the numerator and denominator for the slope (m)
    numerator = ((df["x"] - x_mean) * (df["y"] - y_mean)).sum()
    denominator = ((df["x"] - x_mean) ** 2).sum()

    m = numerator / denominator  # Slope
    b = y_mean - m * x_mean  # Intercept

    return m, b


# Fit the model
m, b = linear_regression(df)

# Step 3: Make Predictions
df = df.with_columns((pl.col("x") * m + b).alias("y_pred"))

# Step 4: Visualize Data with Altair
scatter = alt.Chart(df).mark_point(color='blue').encode(
    x='x',
    y='y',
    tooltip=['x', 'y']
).properties(title='Simple Linear Regression', width=500).interactive()

line = alt.Chart(df).mark_line(color='red').encode(
    x='x',
    y='y_pred',
    tooltip=['x', 'y_pred']
)

# Combine the scatter and line charts
chart = scatter + line

display(df.sort('x'))
display(chart)

# Output the coefficients
print(f"Slope (m): {m}")
print(f"Intercept (b): {b}")

x,y,y_pred
f64,f64,f64
0.046955,1.111389,1.315764
0.187898,1.107793,1.596762
0.201075,0.712601,1.623034
0.202184,1.414868,1.625244
0.391878,1.120278,2.003436
…,…,…
9.636628,19.202503,20.434633
9.764595,20.460948,20.68976
9.767611,19.790467,20.695773
9.786183,22.455518,20.732801


Slope (m): 1.9936935021402027
Intercept (b): 1.2221510774472346


### Test predict


In [31]:
0.201075 * m + b

1.6230329983900758

In [35]:
x = pl.Series(np.arange(10, 30), dtype=pl.Float64)

dfx = pl.DataFrame({'x': x, 'y': x * m + b})
dfx = dfx.with_columns(pl.col('y').alias('y_pred'))

dfz = df.vstack(dfx)

scatter = alt.Chart(dfz).mark_point(color='blue').encode(
    x='x',
    y='y',
    tooltip=['x', 'y']
).properties(title='Simple Linear Regression', width=500).interactive()

line = alt.Chart(dfz).mark_line(color='red').encode(
    x='x',
    y='y_pred',
    tooltip=['x', 'y_pred']
)

# Combine the scatter and line charts
chart = scatter + line
chart