я сделал второе задание только для линейной регрессии, так как не особо умею работаать с классификацией

In [5]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PowerTransformer
from sklearn.model_selection import train_test_split

class CustomDataset(Dataset):
    def __init__(self, filepath, target_col='Price', test_size=0.2, random_state=42):
        # Загрузка данных
        df = pd.read_csv(filepath)
        
        # Определение типов колонок
        self.num_col = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
        self.cat_col = df.select_dtypes(include=['object', 'category']).columns.tolist()
        self.target_col = target_col
        
        # Удаление целевой из списков признаков
        if self.target_col in self.num_col:
            self.num_col.remove(self.target_col)
        if self.target_col in self.cat_col:
            self.cat_col.remove(self.target_col)
        
        # Предобработка
        df[self.num_col] = df[self.num_col].fillna(0)
        df[self.cat_col] = df[self.cat_col].fillna('other')
        df = df.drop_duplicates()
        
        # One-Hot Encoding
        self.ohe = OneHotEncoder(sparse_output=False)
        encoded_cat = self.ohe.fit_transform(df[self.cat_col])
        encoded_df = pd.DataFrame(encoded_cat, columns=self.ohe.get_feature_names_out(self.cat_col))
        
        # Создание финального датасета
        final_df = pd.concat([df[self.num_col], encoded_df, df[[self.target_col]]], axis=1)
        
        # Удаление константных колонок
        constant_cols = [col for col in final_df.columns if final_df[col].nunique() == 1]
        self.final_df = final_df.drop(columns=constant_cols)
        
        # Разделение данных
        X = self.final_df.drop(self.target_col, axis=1).values
        y = self.final_df[self.target_col].values
        
        # Масштабирование
        self.scaler = StandardScaler()
        X_scaled = self.scaler.fit_transform(X)
        self.power_trans = PowerTransformer()
        y_scaled = self.power_trans.fit_transform(y.reshape(-1, 1)).flatten()
        
        # Разделение на train/test
        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y_scaled, test_size=test_size, random_state=random_state
        )
        
        # Преобразование в тензоры
        self.X_train = torch.tensor(X_train, dtype=torch.float32)
        self.y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
        self.X_test = torch.tensor(X_test, dtype=torch.float32)
        self.y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
        
    def get_train_data(self):
        return self.X_train, self.y_train
    
    def get_test_data(self):
        return self.X_test, self.y_test
    
    def inverse_transform_target(self, y):
        return self.power_trans.inverse_transform(y.numpy().reshape(-1, 1))
    
    def __len__(self):
        return len(self.X_train)
    
    def __getitem__(self, idx):
        return self.X_train[idx], self.y_train[idx]

class LinearRegressionManual:
    def __init__(self, in_features):
        self.w = torch.randn(in_features, 1, dtype=torch.float32, requires_grad=False)
        self.b = torch.zeros(1, dtype=torch.float32, requires_grad=False)
        self.l1_lambda = 0
        self.l2_lambda = 0

    def __call__(self, X):
        return X @ self.w + self.b

    def parameters(self):
        return [self.w, self.b]

    def zero_grad(self):
        self.dw = torch.zeros_like(self.w)
        self.db = torch.zeros_like(self.b)
    
    def l1_regularization(self):
        return torch.sum(torch.abs(self.w))

    def l2_regularization(self):
        return torch.sum(self.w ** 2)

    def backward(self, X, y, y_pred):
        n = X.shape[0]
        error = y_pred - y
        self.dw = (X.T @ error) / n
        self.db = error.mean(0)

        if self.l1_lambda > 0:
            self.dw += self.l1_lambda * torch.sign(self.w)
        if self.l2_lambda > 0:
            self.dw += 2 * self.l2_lambda * self.w
    
    def step(self, lr):
        self.w -= lr * self.dw
        self.b -= lr * self.db

    def set_l1_lambda(self, lambda_):
        self.l1_lambda = lambda_

    def set_l2_lambda(self, lambda_):
        self.l2_lambda = lambda_

    def save(self, path):
        torch.save({'w': self.w, 'b': self.b}, path)

    def load(self, path):
        state = torch.load(path)
        self.w = state['w']
        self.b = state['b']

def mse(y_pred, y_true):
    return torch.mean((y_pred - y_true) ** 2)

def r2_score(y_true, y_pred):
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)
    return 1 - ss_res / ss_tot

def log_epoch(epoch, loss, r2):
    print(f'Epoch {epoch:3d} | Loss: {loss:.4f} | R²: {r2:.4f}')

In [6]:
if __name__ == '__main__':
    # Инициализация и загрузка данных
    dataset = CustomDataset('sales_data.csv')
    X_train, y_train = dataset.get_train_data()
    X_test, y_test = dataset.get_test_data()
    
    # Создание DataLoader
    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=32, shuffle=True)
    test_loader = DataLoader(list(zip(X_test, y_test)), batch_size=32, shuffle=False)
    
    # Инициализация модели
    in_features = X_train.shape[1]
    model = LinearRegressionManual(in_features)
    #установка параметров l1 l2
    model.set_l1_lambda(0.01)
    model.set_l2_lambda(0.001)
    
    # Обучение
    lr = 0.01
    epochs = 100
    best_loss = float('inf')
    patience = 10
    no_improve = 0
    
    for epoch in range(1, epochs + 1):
        total_loss = 0
        
        for i, (batch_X, batch_y) in enumerate(train_loader):
            y_pred = model(batch_X)
            loss = mse(y_pred, batch_y)
            
            total_loss += loss.item()
            if model.l1_lambda > 0:
                total_loss += model.l1_lambda * model.l1_regularization().item()
            if model.l2_lambda > 0:
                total_loss += model.l2_lambda * model.l2_regularization().item()
            
            model.zero_grad()
            model.backward(batch_X, batch_y, y_pred)
            model.step(lr)
        
        # Оценка на тестовом наборе
        test_preds = []
        test_true = []
        
        for batch_X, batch_y in test_loader:
            y_pred = model(batch_X)
            test_preds.append(y_pred)
            test_true.append(batch_y)
        
        test_preds = torch.cat(test_preds)
        test_true = torch.cat(test_true)
        test_loss = mse(test_preds, test_true).item()
        r2_sc = r2_score(test_preds,test_true)
        
        avg_loss = total_loss / (i + 1)
        
        if epoch % 10 == 0:
            log_epoch(epoch, test_loss, r2_sc)
        
        # Ранняя остановка
        if (best_loss - avg_loss) > 0.001:
            best_loss = avg_loss
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                print(f"\nРанняя остановка на эпохе {epoch}")
                break
    
    # Сохранение модели
    model.save('linreg_manual_price.pth')

Epoch  10 | Loss: 0.0658 | R²: 0.9294

Ранняя остановка на эпохе 18
