In [1]:
import pandas as pd
import numpy as np
class MyLineReg():
    def __init__(self,n_iter = 100,learning_rate = 0.1,weights = None,metric = None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric
    @staticmethod
    def mae(y,y_pred):
        return np.mean(abs(y - y_pred))
    @staticmethod
    def mse(y,y_pred):
        return np.mean((y-y_pred)**2)
    @staticmethod
    def rmse(y,y_pred):
        return (np.mean((y-y_pred)**2))**0.5
    @staticmethod
    def mape(y,y_pred):
        return 100/len(y_pred)*np.sum(abs((y-y_pred)/y))
    @staticmethod
    def r2(y,y_pred):
        return (1 - np.sum((y - y_pred)**2) / np.sum((y - np.mean(y))**2))
    def fit(self,X,y,verbose=False):
        X = X.copy()
        X.insert(0,'0',1)
        dict_func = {'mae': MyLineReg.mae,
        'mse': MyLineReg.mse,
        'rmse': MyLineReg.rmse,
        'mape': MyLineReg.mape,
        'r2': MyLineReg.r2}
        select_func = dict_func[self.metric]
        self.weights = np.ones(X.shape[1])
        if verbose ==True:
            predict = X.dot(self.weights)
            print(f'start | loss: {1/(X.shape[0])*np.sum((predict-y)**2)}')
        count = 0
        for i in range(self.n_iter):
            predict = X.dot(self.weights)
            error = (predict - y)
            Mse = np.mean(error**2)
            grad = 2/X.shape[0] * error.dot(X)
            self.weights -= self.learning_rate * grad
            if verbose ==True:
                if self.metric == None:
                    if count%100 == 0:
                        print(f'{count} | loss: {Mse}')
                else:
                    if count%100 == 0:
                        print(f'{count} | loss: {Mse}|{self.metric}:{select_func(y,predict)}')
                count += 1
        self.best_metrc = select_func(y,X.dot(self.weights))
    def predict(self,X):
        X= X.copy()
        X.insert(0,'0',1)
        return X.dot(self.weights)
        
    def get_coef(self):
        return self.weights[1:len(self.weights)]
    def get_best_score(self):
        return self.best_metrc
        
    def __str__(self):
        return f"MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate},weights ={self.weights},metric = {self.metric}"







In [2]:
print(MyLineReg())

MyLineReg class: n_iter=100, learning_rate=0.1,weights =None,metric = None


In [11]:
linea =MyLineReg(n_iter = 1000, learning_rate = 0.1,metric = 'mae')

In [12]:
# Создаем простой датафрейм с двумя признаками и одним целевым столбцом
np.random.seed(0) # Для воспроизводимости результатов
X = pd.DataFrame({
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100)
})

# Создаем целевую переменную как линейную комбинацию признаков с некоторым шумом
y = 2 * X['feature1'] + 3 * X['feature2'] + np.random.randn(100) * 0.5

X.head()

Unnamed: 0,feature1,feature2
0,0.548814,0.677817
1,0.715189,0.270008
2,0.602763,0.735194
3,0.544883,0.962189
4,0.423655,0.248753


In [13]:
linea.fit(X,y,verbose=True)

start | loss: 0.7392383477312309
0 | loss: 0.7392383477312309|mae:0.7140082750081017
100 | loss: 0.23783767664021208|mae:0.3944809563432544
200 | loss: 0.21206691850102985|mae:0.37267823893141405
300 | loss: 0.2084942903546603|mae:0.3726465310037086
400 | loss: 0.2079881360927682|mae:0.372865517563604
500 | loss: 0.20791607208520746|mae:0.37305394081588694
600 | loss: 0.20790580063772737|mae:0.37312419056725815
700 | loss: 0.20790433626671792|mae:0.3731505566803224
800 | loss: 0.20790412748416742|mae:0.37316048393055745
900 | loss: 0.2079040977166567|mae:0.37316422733929433


In [14]:
linea.get_coef()

feature1    1.724118
feature2    2.870500
dtype: float64

In [15]:
y_pred = linea.predict(X)

In [16]:
linea.get_best_score()

0.37316563992692037