In [1]:
from boosting import GBRegressor
from tree import TreeRegressor
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from utils import save_model, open_model
from sklearn.metrics import mean_squared_error as mse


In [6]:
data = fetch_california_housing()
x= data.data[:10000]
y = data.target[:10000]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1337)

x_test, x_valid, y_test, y_valid = train_test_split(x_test, y_test, test_size=0.5, random_state=1337)

In [23]:
model = GBRegressor(TreeRegressor, 
                    learning_rate=0.1,
                    base_model_params={'max_depth': 3}, 
                    n_estimators=100,
                    randomization=False,
                    subsample=0.3,
                    random_seed=None,
                    use_best_model=True)

In [None]:
class GradientBoosting():
    
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, 
                 random_state=17, n_samples = 15, min_size = 5, base_tree='Bagging'):
            
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.initialization = lambda y: np.mean(y) * np.ones([y.shape[0]])
        self.min_size = min_size
        self.loss_by_iter = []
        self.trees_ = []
        self.loss_by_iter_test = []
        self.n_samples = n_samples
        self.base_tree = base_tree

    
    def fit(self, X, y):
        self.X = X
        self.y = y
        b = self.initialization(y)
        
        prediction = b.copy()
        
        for t in tqdm_notebook(range(self.n_estimators)):               
            
            if t == 0:
                resid = y
            else:
                # сразу пишем антиградиент
                resid = (y - prediction)
            
            # выбираем базовый алгор            
            if self.base_tree == 'Tree':
                tree = TreeRegressor(max_depth=self.max_depth)
                
            # обучаемся на векторе антиградиента
            tree.fit(X, resid)
            # делаем предикт и добавляем алгоритм к ансамблю
            b = tree.predict(X).reshape([X.shape[0]])
            self.trees_.append(tree)
            prediction += self.learning_rate * b
            # добавляем только если не первая итерация
            if t > 0:
                self.loss_by_iter.append(mse(y,prediction))
                   
        return self
    
    def predict(self, X):
        
        # сначала прогноз – это просто вектор из средних значений ответов на обучении
        pred = np.ones([X.shape[0]]) * np.mean(self.y)
        # добавляем прогнозы деревьев
        for t in range(self.n_estimators):
            pred += self.learning_rate * self.trees_[t].predict(X).reshape([X.shape[0]])
            
        return pred