# Лекция 2

```
- Введение в Машинное обучение, ИСП РАН (Весна 2025)
- Инструкторы: Александр Безносиков и Ко
- Автор ноутбука: Дмитрий Береснев
```

---

Содержание

- Функции потерь
    - Графики функций
    - Графики производных

- От регрессии к классификации. Пороговые значения

---

In [112]:
import os
import warnings

import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objs as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

warnings.filterwarnings("ignore")

## Утилиты

In [113]:
ASSETS_PATH = "./assets/"

In [114]:
def set_seed(seed: int = 420):
    np.random.seed(seed)

In [115]:
def save_plt(name: str):
    plt.savefig(
        f"{os.path.join(ASSETS_PATH, name)}.svg",
        bbox_inches="tight",
        pad_inches=0.1,
        format="svg",
    )
    plt.savefig(
        f"{os.path.join(ASSETS_PATH, name)}_transparent.svg",
        bbox_inches="tight",
        pad_inches=0.1,
        format="svg",
        transparent=True,
    )
    return None


def save_plotly(name: str, fig: go.Figure, svg: bool = True, html: bool = True):
    if html:
        fig.write_html(f"{os.path.join(ASSETS_PATH, name)}.html")
    if svg:
        fig.write_image(f"{os.path.join(ASSETS_PATH, name)}.svg", format="svg")
    return None

## Функции потерь



$$\begin{align*}

    \mathcal{L}_{\text{MSE}}(y, \hat{y}) &=  \frac{1}{2}  {(y - \hat{y})}^2 \\
    \mathcal{L}_{\text{MAE}}(y, \hat{y}) &=   \left| y - \hat{y} \right| \\
    \mathcal{L}_{\text{Huber}}(y, \hat{y}) &= \begin{cases}
        \frac{1}{2}   {(y - \hat{y} )}^2, & \text{if } |y - \hat{y}| \leq \delta, \\
          \delta \left( |y - \hat{y}| - \frac{1}{2} \delta  \right), & \text{otherwise}.
    \end{cases}\\
    \mathcal{L}_{\text{LogCosh}}(y, \hat{y}) &=   \log \left[ \cosh (y - \hat{y}) \right] \\
    \mathcal{L}_{\text{MAPE}}(y, \hat{y}) &= 100 \left| \frac{y - \hat{y}}{ y} \right|\
\end{align*}
$$

### Графики функций

In [116]:
def mse_loss(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return 0.5 * (y_true - y_pred) ** 2


def mae_loss(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return np.abs(y_true - y_pred)


def huber_loss(y_true: np.ndarray, y_pred: np.ndarray, delta: float = 1.35) -> np.ndarray:
    abs_diff = np.abs(y_true - y_pred)
    mse_mask = abs_diff <= delta
    result = np.zeros(y_true.shape)
    result[mse_mask] = mse_loss(y_true[mse_mask], y_pred[mse_mask])
    result[~mse_mask] = delta * (abs_diff[~mse_mask] - 0.5 * delta)
    return result


def log_cosh_loss(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return np.log(np.cosh(y_true - y_pred))


def mape_loss(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return np.abs((y_true - y_pred) / y_true) * 100

In [117]:
def plot_losses(
    y_true: float = 10,
    max_shift: float = 5,
    save_img: bool = False,
    seed: int = 420,
    plot_separate: bool = False,
):
    set_seed(seed)
    shifts = np.linspace(-max_shift, max_shift, 50)
    if 0 not in shifts:
        # assume len(shifts) is even
        shifts = np.insert(shifts, len(shifts) // 2, 0)
    y_true_np = np.array([y_true for _ in range(len(shifts))])
    y_pred_np = y_true_np - shifts

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mse_loss(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MSE",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mae_loss(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MAE",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=huber_loss(y_true_np, y_pred_np),
            mode="lines+markers",
            name="Huber",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=log_cosh_loss(y_true_np, y_pred_np),
            mode="lines+markers",
            name="LogCosh",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mape_loss(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MAPE",
        )
    )

    fig.update_layout(
        title="Функции ошибки",
        legend={
            "orientation": "h",
            "yanchor": "bottom",
            "y": 1.02,
            "xanchor": "right",
            "x": 1,
        },
        xaxis_title=r"$y - \hat{y}= y - \langle w, x \rangle$",
        yaxis_title="Ошибка",
        width=900,
        height=600,
        yaxis_range=[0, 5],
        margin=dict(l=30, r=35, t=50, b=30),
    )

    if plot_separate:
        names = ["MSE", "MAE", "Huber", "LogCosh", "MAPE"]
        fig.update_layout(yaxis_range=None)
        # names = [ "MAPE"]
        for name in names:
            fig.update_traces(
                patch={"visible": False},
            )
            fig.update_traces(patch={"visible": True}, selector={"name": name})
            fig.update_layout(title=f"Функция ошибки {name}")
            fig.show()

            if save_img:
                save_plotly(f"losses_{name.lower()}", fig)

        fig.update_traces(
            patch={"visible": True},
        )
        fig.update_layout(title="Функции ошибки")
        fig.update_layout(yaxis_range=[0, 5])

    fig.show()

    if save_img:
        save_plotly("losses", fig)


plot_losses(save_img=True, plot_separate=True)

### Графики производных

In [118]:
def mse_loss_derivative(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return y_true - y_pred


def mae_loss_derivative(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    x = y_true - y_pred
    return x / np.abs(x)


def huber_loss_derivative(
    y_true: np.ndarray, y_pred: np.ndarray, delta: float = 1.35
) -> np.ndarray:
    abs_diff = np.abs(y_true - y_pred)
    mse_mask = abs_diff <= delta
    result = np.zeros(y_true.shape)
    result[mse_mask] = mse_loss_derivative(y_true[mse_mask], y_pred[mse_mask])
    result[~mse_mask] = delta * mae_loss_derivative(y_true[~mse_mask], y_pred[~mse_mask])
    return result


def log_cosh_loss_derivative(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return np.tanh(y_true - y_pred)


def mape_loss_derivative(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    return mae_loss_derivative(y_true, y_pred) / y_true

In [119]:
def plot_loss_derivatives(
    y_true: float = 5,
    max_shift: float = 4,
    save_img: bool = False,
    seed: int = 420,
    plot_separate: bool = False,
):
    set_seed(seed)
    shifts = np.linspace(-max_shift, max_shift, 50)
    if 0 not in shifts:
        # assume len(shifts) is even
        shifts = np.insert(shifts, len(shifts) // 2, 0)
    y_true_np = np.array([y_true for _ in range(len(shifts))])
    y_pred_np = y_true_np - shifts

    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mse_loss_derivative(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MSE",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mae_loss_derivative(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MAE",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=huber_loss_derivative(y_true_np, y_pred_np),
            mode="lines+markers",
            name="Huber",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=log_cosh_loss_derivative(y_true_np, y_pred_np),
            mode="lines+markers",
            name="LogCosh",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=shifts,
            y=mape_loss_derivative(y_true_np, y_pred_np),
            mode="lines+markers",
            name="MAPE",
        )
    )

    fig.update_layout(
        title="Производные функций ошибки",
        legend={
            "orientation": "h",
            "yanchor": "bottom",
            "y": 1.02,
            "xanchor": "right",
            "x": 1,
        },
        xaxis_title=r"$y-\hat{y}= y - \langle w, x \rangle$",
        yaxis_title="Ошибка",
        width=900,
        height=600,
        yaxis_range=[-2, 2],
        margin=dict(l=30, r=35, t=50, b=30),
    )

    if plot_separate:
        names = ["MSE", "MAE", "Huber", "LogCosh", "MAPE"]
        fig.update_layout(yaxis_range=None)
        # names = [ "MAPE"]
        for name in names:
            fig.update_traces(
                patch={"visible": False},
            )
            fig.update_traces(patch={"visible": True}, selector={"name": name})
            fig.update_layout(title=f"Производная функции ошибки {name}")
            fig.show()

            if save_img:
                save_plotly(f"loss_derivatives_{name.lower()}", fig)

        fig.update_traces(
            patch={"visible": True},
        )
        fig.update_layout(title="Производные функций ошибки")
        fig.update_layout(yaxis_range=[-2, 2])

    fig.show()

    if save_img:
        save_plotly("loss_derivatives", fig)


plot_loss_derivatives(save_img=True, plot_separate=True)

## От регрессии к классификации. Пороговые значения

In [127]:
def find_nearest_idx(array: np.ndarray, value: float):
    return (np.abs(array - value)).argmin()


def create_plot_threshold(
    t: float,
    x: np.ndarray,
    y_true: np.ndarray,
    y_pred: np.ndarray,
):
    y_pred_class = np.where(y_pred > t, 1, -1)

    # Вычисление метрик
    acc = accuracy_score(y_true, y_pred_class)
    recall = recall_score(y_true, y_pred_class)
    precision = precision_score(y_true, y_pred_class)
    f1 = f1_score(y_true, y_pred_class)

    # Построение графика
    fig = go.Figure()

    class1_mask = y_true == 1
    class2_mask = ~class1_mask

    fig.add_trace(
        go.Scatter(
            x=x[class1_mask],
            y=y_pred[class1_mask],
            mode="markers",
            marker=dict(
                color=[
                    "blue" if label == 1 else "red" for label in y_pred_class[class1_mask]
                ],
                symbol="circle",
                line=dict(width=1, color="DarkSlateGrey"),
                size=8,
            ),
            showlegend=False,
            name="Класс A",
        ),
    )

    fig.add_trace(
        go.Scatter(
            x=x[class2_mask],
            y=y_pred[class2_mask],
            mode="markers",
            marker=dict(
                color=[
                    "blue" if label == 1 else "red" for label in y_pred_class[class2_mask]
                ],
                symbol="square",
                line=dict(width=1, color="DarkSlateGrey"),
                size=8,
            ),
            showlegend=False,
            name="Класс B",
        )
    )
    # Легенда
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(size=8, color="black", symbol="square-open"),
            name="Класс A",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(size=8, color="black", symbol="circle-open"),
            name="Класс B",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="lines",
            line=dict(color="red"),
            name="Предсказанные как A",
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="lines",
            line=dict(color="blue"),
            name="Предсказанные как B",
        )
    )

    # Порог
    fig.add_trace(
        go.Scatter(
            x=[min(x), max(x)],
            y=[t, t],
            mode="lines",
            line=dict(dash="dash", color="black"),
            name=f"Порог t = {t:.2f}",
        )
    )

    # Отображение метрик
    fig.update_layout(
        title="Бинарная классификация через регрессию",
        xaxis_title="x",
        yaxis_title="y",
        showlegend=True,
        annotations=[
            dict(
                x=0.9,
                y=1.1,
                xref="paper",
                yref="paper",
                text=f"Accuracy: {acc:.2f} | Recall: {recall:.2f} | Precision: {precision:.2f} | F1-Score: {f1:.2f}",
                showarrow=False,
            )
        ],
        width=900,
        height=600,
    )

    return fig


def plot_threshold(
    n: int = 100, desired_optimal_t: float = 0.0, save_img: bool = False, seed: int = 420
):
    set_seed(seed)

    # Генерация данных
    scale = 5
    x = np.linspace(-scale, scale, n) + np.random.normal(0, 0.2, n)
    np.random.shuffle(x)
    # y = np.where(x > 0, 1, -1) + np.random.normal(0, 0.5, n)
    y = x / scale + np.random.normal(0, 1, n)

    y_true = np.where(x + np.random.normal(0, 2, n) >= desired_optimal_t * scale, 1, -1)

    # x = x + np.random.normal(0, 1, n)

    # Обучение регрессии
    train_size = int(0.6 * len(x))
    train_x, train_y = x[:train_size].reshape(-1, 1), y[:train_size]

    model = LinearRegression()
    model.fit(train_x, train_y)
    y_pred = model.predict(x.reshape(-1, 1))

    # Слайдер
    t_values = np.concat(
        [
            np.linspace(-2, -0.5, 10, endpoint=False),
            np.linspace(-0.5, 0, 10, endpoint=False),
            np.linspace(0, 0.5, 10, endpoint=False),
            np.linspace(0.5, 2, 10),
        ]
    )
    frames = []
    for t in t_values:
        t_fig = create_plot_threshold(t, x, y_true, y_pred)
        frame = go.Frame(data=t_fig.data, name=str(t), layout=t_fig.layout)
        frames.append(frame)

    t_start_idx = find_nearest_idx(t_values, desired_optimal_t)
    fig = create_plot_threshold(t_values[t_start_idx], x, y_true, y_pred)
    fig.update(frames=frames)

    fig.update_layout(
        sliders=[
            {
                "active": t_start_idx,
                "currentvalue": {"prefix": "Порог t: "},
                "pad": {"t": 50},
                "steps": [
                    {
                        "args": [
                            [str(t)],
                            {
                                "frame": {"duration": 0, "redraw": True},
                                "mode": "immediate",
                            },
                        ],
                        "label": f"{t:.2f}",
                        "method": "animate",
                    }
                    for t in t_values
                ],
            }
        ]
    )

    fig.show()
    if save_img:
        save_plotly(f"threshold_{desired_optimal_t}", fig)


plot_threshold(n=50, desired_optimal_t=0.0, save_img=True)

In [128]:
plot_threshold(n=50, desired_optimal_t=0.5, save_img=True)

In [129]:
plot_threshold(n=50, desired_optimal_t=-0.5, save_img=True)