### Linear Data

In [None]:
import numpy as np
from LinearModel import LinearRegresionModel
from sklearn.preprocessing import StandardScaler

np.random.seed(42)  # to make this code example reproducible
m = 100  # number of instances
X = 2 * np.random.rand(m, 1)  # column vector
y = 4 + 3 * X + np.random.randn(m, 1)  # column vector

model = LinearRegresionModel(X, y)
print(model.normal_eqn().predict([[2]]))
print(model.pseudoinverse().predict([[2]]))
print(model.GD(verbose=0).predict([[2]]))
print(model.GD(strategy="SGD",verbose=1).predict([[2]]))
print(model.GD(strategy="mBGD",verbose=1).predict([[2]]))
print(model.test())

In [None]:
from sklearn.linear_model import SGDRegressor

sgd_reg = SGDRegressor(max_iter=1000, tol=1e-5, penalty=None, eta0=0.01,
                       n_iter_no_change=100, random_state=42)
sgd_reg.fit(X, y.ravel())  # y.ravel() because fit() expects 1D targets
print(sgd_reg.intercept_, sgd_reg.coef_)
sgd_reg.predict([[2]])

In [1]:
import numpy as np
from LinearModel import LinearRegresionModel
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error

# np.random.seed(42)  # to make this code example reproducible

# Generate synthetic linear dataset
m = 500  # number of instances
X = 2 * np.random.rand(m, 1)  # column vector
y = 4 + 3 * X + np.random.randn(m, 1)  # column vector

# Generate synthetic quadratic dataset
m = 200
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)

# Generate synthetic dataset
X, y = make_regression(n_samples=5000, n_features=5, noise=0.1, random_state=42)
y = y.reshape(-1, 1)  # Reshaping to match the shape expected by your model

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training your model
my_model = LinearRegresionModel(inputs=X_train, labels=y_train, validation_split=0.1)
my_model = my_model.GD(strategy="mBGD", verbose=0, batch_size=100, epsilon=1e-10, n_iter_no_change=5)
# my_model = my_model.pseudoinverse()

# Training the SGDRegressor from sklearn
sgd_reg = SGDRegressor(max_iter=10000, tol=1e-5, penalty=None, eta0=0.01,
                       n_iter_no_change=100, random_state=42)
sgd_reg.fit(X_train, y_train.ravel())  # y.ravel() because fit() expects 1D targets

# Testing and comparing performance
y_pred_my_model = my_model.predict(X_test)
y_pred_sgd_reg = sgd_reg.predict(X_test).reshape(-1, 1)  # Reshaping to match the shape

print(y_pred_my_model[:10])
print(y_pred_sgd_reg[:10])
print(y_test[:10])

mse_my_model = mean_squared_error(y_test, y_pred_my_model)
mse_sgd_reg = mean_squared_error(y_test, y_pred_sgd_reg)

print(f"MSE for your model: {mse_my_model} {my_model}")
print(f"MSE for SGDRegressor: {mse_sgd_reg} Params: {sgd_reg.intercept_} {sgd_reg.coef_}")

if mse_my_model < mse_sgd_reg:
    print("Your model performed better!")
elif mse_my_model > mse_sgd_reg:
    print("SGDRegressor performed better!")
else:
    print("Both models have the same performance!")


Running mBGD Algo with a batch size of 100
[[26.95917251]
 [31.90402501]
 [32.37516758]
 [91.7457024 ]
 [20.73510245]
 [37.11734841]
 [16.59384586]
 [55.14220891]
 [47.56358478]
 [16.56033085]]
[[-54.04329655]
 [  9.42388657]
 [-67.50521571]
 [347.08962042]
 [  4.58183663]
 [132.69165233]
 [-44.03141039]
 [170.58075862]
 [  7.86806858]
 [-84.08175913]]
[[-54.33993682]
 [  9.43281056]
 [-67.47112052]
 [347.20694834]
 [  4.60517783]
 [132.49847765]
 [-44.03824716]
 [170.45790111]
 [  7.93564109]
 [-84.06213641]]
MSE for your model: 15858.42517605751 Parameters: Bias: -101.84, Params: [89.64, 25.983, 108.34, -24.828, 37.329]
MSE for SGDRegressor: 0.010219601978562807 Params: [0.00153687] [86.22204764 49.71792566 99.50530551 14.21605804 53.55142189]
SGDRegressor performed better!


In [None]:
t = lambda lr: 5 / (lr + 50)

t(0.01*10000)

### Non-linear Data

In [None]:
import numpy as np
from LinearModel import LinearRegresionModel
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

np.random.seed(42)
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X ** 2 + X + 2 + np.random.randn(m, 1)

poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)

print("Sample Data:",X[0],X_poly[0])

# Scikit Learn Library
lin_reg = LinearRegression()
lin_reg.fit(X_poly, y)
print("Scikit Learn:\n",lin_reg.intercept_, lin_reg.coef_)

model = LinearRegresionModel(X_poly, y, lr=0.1)
model.test()