In [23]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt

In [29]:
class RegOptimizer():
    def __init__(self, alpha, n_iters):
        self.theta = None
        self._alpha = alpha
        self._n_iters = n_iters
    
    def gradient_step(self, theta, theta_grad):
        return theta - self._alpha * theta_grad
    
    def grad_func(self, X, y, theta):
        raise NotImplementedError()

    def optimize(self, X, y, start_theta, n_iters):
        theta = start_theta.copy()

        for i in range(n_iters):
            theta_grad = self.grad_func(X, y, theta)
            theta = self.gradient_step(theta, theta_grad)
            print(i, theta_grad, theta)
            if np.abs(theta_grad).max() < 0.01:
                return theta

        return theta
    
    def fit(self, X, y):
        m = X.shape[1]
        start_theta = np.array([2,0])
        self.theta = self.optimize(X, y, start_theta, self._n_iters)
        
    def predict(self, X):
        raise NotImplementedError()

In [30]:
class LinReg(RegOptimizer):
    def grad_func(self, X, y, theta):
        n = X.shape[0]
        grad = 1. / n * X.transpose().dot(X.dot(theta) - y)

        return grad
    
    def predict(self, X):
        if self.theta is None:
            raise Exception('You should train the model first')
        
        y_pred = X.dot(self.theta)
        
        return y_pred

In [31]:
def print_regression_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f'MSE = {mse:.2f}, RMSE = {rmse:.2f}')

In [32]:
X = np.array([[1, 2], [1, 1]], np.double)
y=np.array([2,5], np.double)

In [33]:
linreg_crit = LinReg(1/6,1000)
linreg_crit.fit(X, y)
y_pred = linreg_crit.predict(X)

# Посчитать значение ошибок MSE и RMSE для тренировочных данных
print_regression_metrics(y, y_pred)

0 [-1.5 -1.5] [ 2.25  0.25]
1 [-0.875 -0.5  ] [ 2.39583333  0.33333333]
2 [-0.60416667 -0.07291667] [ 2.49652778  0.34548611]
3 [-0.48524306  0.10850694] [ 2.57740162  0.32740162]
4 [-0.43149595  0.18460648] [ 2.64931761  0.29663387]
5 [-0.40573158  0.2155611 ] [ 2.71693954  0.26070702]
6 [-0.39199992  0.22717687] [ 2.78227286  0.22284421]
7 [-0.38346082  0.23051982] [ 2.846183    0.18442424]
8 [-0.37718064  0.2303351 ] [ 2.90904644  0.14603506]
9 [-0.37190097  0.2286573 ] [ 2.97102993  0.10792551]
10 [-0.3670818   0.22635867] [ 3.03221024  0.07019906]
11 [-0.36249117  0.22381301] [ 3.09262543  0.03289689]
12 [-0.35802923  0.22118038] [ 3.15229697 -0.0039665 ]
13 [-0.35365279  0.2185292 ] [ 3.2112391  -0.04038804]
14 [-0.34934295  0.21588856] [ 3.26946293 -0.07636946]
15 [-0.34509127  0.21327073] [ 3.32697814 -0.11191458]
16 [-0.34089374  0.21068074] [ 3.38379376 -0.14702804]
17 [-0.3367483   0.20812054] [ 3.43991848 -0.1817148 ]
18 [-0.33265372  0.20559072] [ 3.49536076 -0.21597992]
1

Задание 3.4.5

In [41]:
y = np.array([2,3,-1,4])
y_pred = np.array([1,3,2,5])
ssres = np.sum(np.square(np.subtract(y,y_pred)))
sstot = np.sum(np.square(np.subtract(y,y.mean())))
r2=1-ssres/sstot
r2

0.2142857142857143