In [1]:
%run homework_modules.ipynb

In [3]:
import torch
from torch.autograd import Variable
from sklearn.datasets import fetch_openml
import numpy as np
from scipy.special import erf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

## Оптимизатор SGD (Стохастический градиентный бустинг)

In [78]:
class SGD:
    def __init__(self, parameters, lr=0.01, momentum=0.9):
        self.parameters = parameters
        self.lr = lr
        self.momentum = momentum
        self.velocities = []
        
        for p in self.parameters:
            
            if isinstance(p, np.ndarray):
                self.velocities.append(np.zeros_like(p))
            elif isinstance(p, list):
                
                try:
                    self.velocities.append(np.zeros_like(np.array(p, dtype=float)))
                except:
                    
                    self.velocities.append([np.zeros_like(sub_p) for sub_p in p])
            else:
                
                self.velocities.append(np.zeros_like(np.array(p)))

    def step(self):
        for i, (param, grad) in enumerate(zip(self.parameters, self.get_gradients())):
            self.velocities[i] = self.momentum * self.velocities[i] + self.lr * grad
            param -= self.velocities[i]

    def get_gradients(self):
        grads = []
        for module in self.parameters:
            if isinstance(module, Module):
                grads.extend(module.getGradParameters())
            else:
                grads.append(module)
        return grads

## Шедулер для lr

In [26]:
class StepLR:
    def __init__(self, optimizer, step_size=30, gamma=0.1):
        self.optimizer = optimizer
        self.step_size = step_size
        self.gamma = gamma
        self.epoch = 0

    def step(self):
        self.epoch += 1
        if self.epoch % self.step_size == 0:
            self.optimizer.lr *= self.gamma

## Ранняя останова

In [27]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_loss = np.inf
        self.counter = 0
        
    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

## Модель для задачи мультирегрессии

In [21]:
data1 = fetch_openml(name = "satimage", version = 1, as_frame = True)
X_r = data1.data
y_r = data1.target

In [22]:
X_r.head()

Unnamed: 0,Aattr,Battr,Cattr,Dattr,Eattr,Fattr,A1attr,B2attr,C3attr,D4attr,...,C21attr,D22attr,E23attr,F24attr,A25attr,B26attr,C27attr,D28attr,E29attr,F30attr
0,0.117596,1.241362,1.184036,0.815302,-0.158561,1.256483,1.193546,0.818486,-0.141965,0.879481,...,1.251179,0.807707,-0.069968,1.21916,1.250463,0.597678,-0.054291,1.233342,1.262255,0.603258
1,-1.205362,-1.249654,-0.077532,0.444886,-0.895959,-0.447579,-0.78676,-0.554203,-0.364672,0.092157,...,-0.614884,-0.192752,-0.736996,-0.969292,-0.844805,-0.40003,-0.725852,-0.344432,-0.594534,-0.183967
2,0.779075,0.148811,0.042617,-0.24303,0.800057,0.164136,0.05337,-0.448612,0.154978,-0.345245,...,-0.915862,-0.877277,0.671174,-0.006373,-0.425752,-0.662584,0.691889,0.356801,-0.175259,-0.236449
3,1.146564,0.585831,0.342991,0.021553,0.947536,0.601074,0.353416,0.02655,1.788164,1.010702,...,0.528832,0.28115,1.412317,1.044084,0.532085,0.282612,1.438068,1.058033,0.842981,0.130923
4,-0.764376,-1.16225,-0.137607,0.180303,-0.969698,-1.146681,-0.126658,0.184937,-0.735851,-1.132569,...,-0.79547,-0.192752,-0.885225,-1.231906,-0.784941,-0.347519,-0.875088,-1.220973,-0.774223,-0.551339


In [41]:
X_r_train, X_r_test, y_r_train, y_r_test = train_test_split(X_r, y_r, test_size=0.2)

#### Маленькая модель

In [76]:
def create_small_regr_model(input_size):
    model = Sequential()
    model.add(Linear(5144, 36))
    model.add(ReLU())
    model.add(Linear(5144, 36))
    return model

#### Средняя модель

In [46]:
def create_medium_regr_model(input_size):
    model = Sequential()
    model.add(Linear(13, 256))
    model.add(BatchNormalization())
    model.add(ReLU())
    model.add(Dropout(0.3))
    model.add(Linear(256, 1))
    return model

#### Большая модель

In [47]:
def create_large_regr_model(input_size):
    model = Sequential()
    model.add(Linear(13, 512))
    model.add(BatchNormalization())
    model.add(ELU())
    model.add(Dropout(0.5))
    model.add(Linear(512, 256))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(Linear(256, 1))
    return model

In [74]:
def train_regression(X_train, y_train, X_test, y_test, input_size):
    model = create_small_regr_model(input_size)
    criterion = MSECriterion()
    optimizer = SGD(model.getParameters(), lr=0.01, momentum=0.9)
    scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
    early_stop = EarlyStopping(patience=10)
    
    train_loss, val_loss = [], []
    for epoch in range(5):
        # Обучение
        model.train()
        output = model.forward(X_train)
        loss = criterion.forward(output, y_train)
        grad = criterion.backward(output, y_train)
        model.backward(X_train, grad)
        optimizer.step()
        train_loss.append(loss)
        
        # Валидация
        model.evaluate()
        val_output = model.forward(X_test)
        val_loss.append(criterion.forward(val_output, y_test))
        
        # Шедулер и ранняя остановка
        scheduler.step()
        if early_stop(val_loss[-1]):
            print(f"Early stopping at epoch {epoch}")
            break
            
    plot_losses(train_loss, val_loss, "Regression Training")
    return model

In [67]:
def test_regression(model, X_test, y_test):
    model.evaluate()  # Переводим модель в режим оценки
    predictions = model.forward(X_test)
    mse = np.mean((predictions - y_test) ** 2)
    print(f"Test MSE: {mse:.4f}")
    return predictions

In [77]:
reg_model = train_regression(X_r_train, y_r_train, X_r_test, y_r_test, X_r_train.shape[1])
test_regression(reg_model, X_r_test, y_r_test)

ValueError: Dot product shape mismatch, (5144, 36) vs (5144, 36)