# Практика 5

Номер варианта: 16 <br>
Последние две цифры билета: 72

In [134]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Model, load_model
from keras.layers import Input, Dense, concatenate
from keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

Генерируем данные

In [135]:
np.random.seed(16)
num_samples = 1000
num_features = 7


data = np.random.rand(num_samples, num_features)
data[:, 2] = data[:, 0] * 0.5 + data[:, 1] * 0.3 + \
    np.random.normal(0, 0.05, num_samples)

columns = [f"feature_{i+1}" for i in range(num_features)]
data_df = pd.DataFrame(data, columns=columns)

data_df.to_csv("original_dataset.csv", index=False)

Удаляем третью целевую колонку ((номер зачетки = 72) % 7 + 1 = 3)

In [136]:
X = data_df.drop(columns=["feature_3"], axis=1)
y = data_df["feature_3"]

Разбиваем данные и нормлизуем

In [137]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=16)

# Нормализация данных для улучшения обучения моделей
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Создаем единую модель

In [138]:
input_layer = Input(shape=(X_train_scaled.shape[1],), name="Input_Layer")

encoded = Dense(5, activation='relu', name="Encoder_Layer_1")(input_layer)
encoded = Dense(3, activation='relu', name="Encoded_Representation")(encoded)

decoded = Dense(5, activation='relu', name="Decoder_Layer_1")(encoded)
decoded = Dense(X_train_scaled.shape[1], activation='sigmoid', name="Decoder_Output")(decoded)

regression_output = Dense(1, activation='linear', name="Regression_Output")(encoded)

autoencoder_regressor = Model(inputs=input_layer, outputs=[decoded, regression_output], name="Autoencoder_Regressor")

Компилируем и обучаем модель

In [139]:
autoencoder_regressor.compile(
    optimizer=Adam(learning_rate=0.001),
    loss={"Decoder_Output": "mse", "Regression_Output": "mse"},
    loss_weights={"Decoder_Output": 1.0, "Regression_Output": 1.0},
    metrics={"Decoder_Output": "mae", "Regression_Output": "mae"}
)

early_stopping = EarlyStopping(
    monitor='val_loss', mode='min', min_delta=0.001, patience=10, restore_best_weights=False)

history = autoencoder_regressor.fit(
    X_train_scaled,
    {"Decoder_Output": X_train_scaled, "Regression_Output": y_train},
    epochs=150,
    batch_size=32,
    validation_split=0.2,
    verbose=1,
    callbacks=[early_stopping]
)

Epoch 1/150
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - Decoder_Output_loss: 0.0856 - Decoder_Output_mae: 0.2542 - Regression_Output_loss: 0.1609 - Regression_Output_mae: 0.3555 - loss: 0.2466 - val_Decoder_Output_loss: 0.0816 - val_Decoder_Output_mae: 0.2441 - val_Regression_Output_loss: 0.1184 - val_Regression_Output_mae: 0.2988 - val_loss: 0.2000
Epoch 2/150
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - Decoder_Output_loss: 0.0834 - Decoder_Output_mae: 0.2499 - Regression_Output_loss: 0.1192 - Regression_Output_mae: 0.2984 - loss: 0.2026 - val_Decoder_Output_loss: 0.0816 - val_Decoder_Output_mae: 0.2441 - val_Regression_Output_loss: 0.0827 - val_Regression_Output_mae: 0.2396 - val_loss: 0.1643
Epoch 3/150
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - Decoder_Output_loss: 0.0842 - Decoder_Output_mae: 0.2506 - Regression_Output_loss: 0.0842 - Regression_Output_mae: 0.2424 - loss: 0.1685 - val_Dec

Разделяем модель на кодировщик, декодировщик и регрессионную модель

In [140]:
encoder = Model(inputs=input_layer, outputs=encoded, name="Encoder")

encoded_input = Input(shape=(3,), name="Encoded_Input")
decoder_layer1 = autoencoder_regressor.get_layer("Decoder_Layer_1")(encoded_input)
decoder_output = autoencoder_regressor.get_layer("Decoder_Output")(decoder_layer1)
decoder = Model(inputs=encoded_input, outputs=decoder_output, name="Decoder")

regression_model = Model(inputs=encoded_input, outputs=autoencoder_regressor.get_layer("Regression_Output")(encoded_input), name="Regressor")


Получаем результаты работы моделей

In [141]:
encoded_data = encoder.predict(X_test_scaled, verbose=1)

decoded_data = decoder.predict(encoded_data, verbose=1)

predicted_regression = regression_model.predict(encoded_data, verbose=1)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


Сохраняем результатов работы моделей

In [142]:
pd.DataFrame(encoded_data, columns=["encoded_1", "encoded_2", "encoded_3"]).to_csv(
    "encoded_data.csv", index=False)

pd.DataFrame(decoded_data, columns=X.columns).to_csv(
    "decoded_data.csv", index=False)

pd.DataFrame({"actual": y_test, "predicted": predicted_regression.flatten()}).to_csv(
    "regression_results.csv", index=False)

Сохраняем модели

In [143]:
autoencoder.save("autoencoder_model.h5")
encoder.save("encoder_model.h5")
decoder.save("decoder_model.h5")
regression_model.save("regression_model.h5")



Метрики для оценивания качества регрессионной модели

In [144]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, predicted_regression.flatten())
mse = mean_squared_error(y_test, predicted_regression.flatten())

print(f"Средняя абсолютная погрешность (MAE): {mae:.4f}")
print(f"Среднеквадратичная погрешность (MSE): {mse:.4f}")


Средняя абсолютная погрешность (MAE): 0.0405
Среднеквадратичная погрешность (MSE): 0.0027
