In [1]:
import numpy as np
import pandas as pd

class LinearRegression:
    def __init__(self, max_steps=100, step_size=0.1):
        self.max_steps = max_steps
        self.step_size = step_size

    # Define a method that's our loss
    def sum_of_squared_errors(self, y, preds):
        return np.sum((preds - y) ** 2)

    def fit(self, X, y):
        num_samples, num_features = X.shape
        # Initialize our weights to zero
        self.weights = np.zeros(X.shape[1])

        # Make predictions with the initialized weights
        preds = self.predict(X)

        # Compute the loss with this prediction
        current_loss = self.sum_of_squared_errors(y, preds)

        # Start running Gradient Descent
        for _ in range(self.max_steps):
            # Compute the partial derivative of the loss with respect to weights
            dw = np.dot(X.T, (preds - y)) * (1 / num_samples)
            
            # Update our weights with this partial derivative
            self.weights -= dw * self.step_size

            # Recompute our predictions
            preds = self.predict(X)

            # Recompute our loss
            new_loss = self.sum_of_squared_errors(y, preds)

            # We want our loss to go down with each iteration
            if new_loss >= current_loss:
                break

            current_loss = new_loss

    def predict(self, X):
        # Pass in some values, apply the weights and make a prediction
        preds = np.dot(X, self.weights)
        return preds


Now we try to example dataset.

In [2]:
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=1000, n_features=1, noise=10)

# maybe you want to see how the data looks like
# remember that 'X' is a Feature and 'y' is a Target
# so you can make like this:

df = pd.DataFrame(data=X, columns=['Feature'])
df['Target'] = y
df

Unnamed: 0,Feature,Target
0,0.157296,5.545396
1,-0.867824,-62.884123
2,0.178352,9.593019
3,-1.149629,-75.173153
4,-0.360598,-14.798415
...,...,...
995,-0.439090,-41.872479
996,0.168671,12.244196
997,0.192870,14.412658
998,1.768221,133.323938


now we can try to split data in to the Train and Tets with skelarn

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# from this we can see that data is split into 80% training and 20% testing
# nahh, you can try to training model like this:
# we use the model that we make before
model = LinearRegression(max_steps=1000, step_size=0.01)
# now we fit the model
model.fit(X_train, y_train)

nah, in the last you can try to predict the test dataset

In [4]:
predictions = model.predict(X_test)

print("Predicted vs Actual values:")
for i in range(10):
    print(f"Predicted: {predictions[i]:.2f}, Actual: {y_test[i]:.2f}")

Predicted vs Actual values:
Predicted: 29.93, Actual: 32.00
Predicted: 96.39, Actual: 106.15
Predicted: 32.22, Actual: 25.64
Predicted: -4.97, Actual: -12.57
Predicted: 70.32, Actual: 83.30
Predicted: 58.86, Actual: 70.95
Predicted: -68.46, Actual: -61.78
Predicted: 24.79, Actual: 9.72
Predicted: 42.46, Actual: 22.74
Predicted: 11.09, Actual: 0.42


at this point you can try to make the models better, Thankyouu 