# Лабораторная работа 4

## Нейронные сети

## Задание
1. Вам необходимо реализовать все объявленные методы в папке **mla**.
2. Вам необходимо реализовать класс NeuralNet используя методы из папки **mla** в ячейке 1.
3. Проверить работу на синтетических данных в ячейке 2.
4. Создать нейросеть и подобрать ее архитектуру и параметры для датасета из **вашего варианта** в ячейке 3.
5. Проверить работу и точность с помощью заранее объявленной строки в ячейке 3 - `assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.8`


In [1]:
from lab_4.mla.neuralnet import NeuralNet

In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_samples=750, n_features=10, n_informative=8, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=1111)

In [3]:
# Проверка работы
from mla.basic import Dense, Activation, Dropout
from mla.parameters import Parameters
from mla.constraints import MaxNorm
from mla.regularizers import L2
from mla.optimizers import Adadelta, Adam
from mla.utils import one_hot

from sklearn.metrics import roc_auc_score

y_train_onehot = one_hot(y_train)
y_test_onehot = one_hot(y_test)

model = NeuralNet(
    layers=[
        Dense(256, Parameters(init_name="uniform", regularizers={"W": L2(0.05)})),
        Activation("relu"),
        Dropout(0.5),
        Dense(128, Parameters(init_name="normal", constraints={"W": MaxNorm()})),
        Activation("relu"),
        Dense(2),
        Activation("softmax"),
    ],
    loss_name="categorical_crossentropy",
    optimizer=Adadelta(),
    metric_name="accuracy",
    batch_size=64,
    max_epochs=25,
)
model.fit(X_train, y_train_onehot)
predictions = model.predict(X_test)
assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95

  0%|          | 0/25 [00:00<?, ?it/s]

In [4]:
roc_auc_score(y_test_onehot[:, 0], predictions[:, 0])

1.0

In [20]:
import pandas as pd

# Загрузка данных
data = pd.read_csv('Diabetes Binary Classification.csv')
data

Unnamed: 0,Number of times pregnant,Plasma glucose concentration a 2 hours in an oral glucose tolerance test,Diastolic blood pressure (mm Hg),Triceps skin fold thickness (mm),2-Hour serum insulin (mu U/ml),Body mass index (weight in kg/(height in m)^2),Diabetes pedigree function,Age (years),Class variable (0 or 1)
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [21]:
# Разделение данных на признаки и целевую переменную
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Преобразование целевой переменной в one-hot представление
y_train_onehot = one_hot(y_train)
y_test_onehot = one_hot(y_test)

In [24]:
# Создание модели
model = NeuralNet(
    layers=[
        Dense(128, Parameters(init_name="uniform", regularizers={"W": L2(0.01)})),
        Activation("leakyrelu"),
        Dropout(0.5),
        Dense(64, Parameters(init_name="normal", constraints={"W": MaxNorm()})),
        Activation("leakyrelu"),
        Dense(2),
        Activation("softmax"),
    ],
    loss_name="categorical_crossentropy",
    optimizer=Adam(0.1),
    metric_name="accuracy",
    batch_size=64,
    max_epochs=50,
    verbose=True
)

# Обучение модели
model.fit(X_train, y_train_onehot)

# Предсказания
predictions = model.predict(X_test)

# Проверка метрики AUC-ROC
roc_auc = roc_auc_score(y_test_onehot[:, 0], predictions[:, 0])
print(f"AUC-ROC: {roc_auc:.4f}")

  0%|          | 0/50 [00:00<?, ?it/s]

AUC-ROC: 0.8108


In [25]:
# Утверждение, что модель достигает порога качества
assert roc_auc >= 0.8, "Model did not reach the expected ROC-AUC threshold."

In [19]:
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

# Импорт пользовательских классов и функций
from mla.basic import Dense, Activation, Dropout
from mla.parameters import Parameters
from mla.constraints import MaxNorm
from mla.regularizers import L2
from mla.optimizers import Adadelta, Adam
from mla.utils import one_hot
from mla.neuralnet import NeuralNet

# Загрузка данных
data = pd.read_csv('Diabetes Binary Classification.csv')

# Разделение данных на признаки и целевую переменную
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Преобразование целевой переменной в one-hot представление
y_train_onehot = one_hot(y_train)
y_test_onehot = one_hot(y_test)

# Гиперпараметры для перебора
param_grid = {
    "n_neurons": [128, 256, 512],
    "activation": ["relu", "leakyrelu"],
    "dropout": [0.2, 0.3, 0.5],
    "regularizer": [0.01, 0.05, 0.1],
    "optimizer": [Adadelta, Adam],
    "learning_rate": [0.001, 0.01, 0.1],
}

best_model = None
best_score = 0
best_params = {}

# Подбор гиперпараметров
for params in itertools.product(*param_grid.values()):
    n_neurons, activation, dropout, regularizer, optimizer, learning_rate = params

    # Создание модели
    model = NeuralNet(
        layers=[
            Dense(n_neurons, Parameters(init_name="uniform", regularizers={"W": L2(regularizer)})),
            Activation(activation),
            Dropout(dropout),
            Dense(int(n_neurons / 2), Parameters(init_name="normal", constraints={"W": MaxNorm()})),
            Activation(activation),
            Dense(2),
            Activation("softmax"),
        ],
        loss_name="categorical_crossentropy",
        optimizer=optimizer(learning_rate=learning_rate),
        metric_name="accuracy",
        batch_size=64,
        max_epochs=50,
        verbose=False,  # Уменьшаем вывод для экономии ресурсов
    )

    # Обучение модели
    model.fit(X_train, y_train_onehot)

    # Предсказания
    predictions = model.predict(X_test)

    # Оценка AUC-ROC
    roc_auc = roc_auc_score(y_test_onehot[:, 0], predictions[:, 0])

    print(f"Params: {params}, AUC-ROC: {roc_auc:.4f}")

    if roc_auc > best_score:
        best_score = roc_auc
        best_model = model
        best_params = {
            "n_neurons": n_neurons,
            "activation": activation,
            "dropout": dropout,
            "regularizer": regularizer,
            "optimizer": optimizer.__name__,
            "learning_rate": learning_rate,
        }

# Итоговый результат
print("Best Parameters:", best_params)
print("Best AUC-ROC:", best_score)


Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.001), AUC-ROC: 0.3964
Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.01), AUC-ROC: 0.5028
Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.1), AUC-ROC: 0.5416
Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adam'>, 0.001), AUC-ROC: 0.5651
Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adam'>, 0.01), AUC-ROC: 0.7473
Params: (128, 'relu', 0.2, 0.01, <class 'mla.optimizers.Adam'>, 0.1), AUC-ROC: 0.5000
Params: (128, 'relu', 0.2, 0.05, <class 'mla.optimizers.Adadelta'>, 0.001), AUC-ROC: 0.5822
Params: (128, 'relu', 0.2, 0.05, <class 'mla.optimizers.Adadelta'>, 0.01), AUC-ROC: 0.4057
Params: (128, 'relu', 0.2, 0.05, <class 'mla.optimizers.Adadelta'>, 0.1), AUC-ROC: 0.5741
Params: (128, 'relu', 0.2, 0.05, <class 'mla.optimizers.Adam'>, 0.001), AUC-ROC: 0.6626
Params: (128, 'relu', 0.2, 0.05, <class 'mla.optimizers.Adam'>, 0.01), AUC-ROC: 0.6918
Params: (128, 'rel

  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Params: (128, 'sigmoid', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.001), AUC-ROC: 0.5117


  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Params: (128, 'sigmoid', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.01), AUC-ROC: 0.5460


  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Params: (128, 'sigmoid', 0.2, 0.01, <class 'mla.optimizers.Adadelta'>, 0.1), AUC-ROC: 0.6507


  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Params: (128, 'sigmoid', 0.2, 0.01, <class 'mla.optimizers.Adam'>, 0.001), AUC-ROC: 0.7339


  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))


Params: (128, 'sigmoid', 0.2, 0.01, <class 'mla.optimizers.Adam'>, 0.01), AUC-ROC: 0.7313


  lambda ans, x, y : unbroadcast_f(y, lambda g: - g * x / y**2))
  return f_raw(*args, **kwargs)
  defvjp(anp.exp,    lambda ans, x : lambda g: ans * g)


ValueError: Input contains NaN.