### Linear Regression

We implement ordinary least squares (OLS) with the closed-form least squares solution. Although the target here is 0/1, linear regression outputs continuous values, so we evaluate with MSE and optionally threshold predictions to compare with classification accuracy.


In [1]:
import numpy as np
import pandas as pd

from rice_ml.supervised_learning import LinearRegression

df = pd.read_csv("../data/lesions_processed.csv")

X = df[["x_norm","y_norm","slice_norm","r_xy","experiment"]].values
y = df["tampered"].values.astype(float)  # regression target (0/1 as float)


In [2]:
#split
rng = np.random.default_rng(42)
idx = np.arange(len(y))
rng.shuffle(idx)
split = int(0.75 * len(y))
tr, te = idx[:split], idx[split:]

Xtr, Xte = X[tr], X[te]
ytr, yte = y[tr], y[te]


In [3]:
#predict
lr = LinearRegression()
lr.fit(Xtr, ytr)

pred = lr.predict(Xte)
pred[:10]

#MSE
mse = np.mean((pred - yte) ** 2)
mse


np.float64(0.15932737822078352)

In [4]:
#class and accuracy
pred_class = (pred >= 0.5).astype(int)
acc = np.mean(pred_class == yte.astype(int))
acc


np.float64(0.7906976744186046)