In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

# 1. Загрузка данных
data = pd.read_excel("../datasets/2_class.xls")
features = data.columns.drop('Class')  # Убедитесь, что 'Class' — целевой столбец
num_features = len(features)

# 2. Нормализация данных (кроме 'Class')
scaler = MinMaxScaler()
scaled_values = scaler.fit_transform(data[features])

# 3. Создание скользящих окон для многомерных данных
def create_sequences(data, window_size=10):
    num_samples = len(data) - window_size + 1
    sequences = np.zeros((num_samples, window_size, data.shape[1]))
    for i in range(num_samples):
        sequences[i] = data[i:i+window_size]
    return sequences

window_size = 10
X = create_sequences(scaled_values, window_size)
X_reshaped = X.reshape(-1, window_size * num_features)  # Преобразуем в 2D

# 4. Архитектура автоэнкодера
input_dim = window_size * num_features
encoding_dim = 32  # Можно настроить

input_layer = Input(shape=(input_dim,))
encoder = Dense(128, activation='relu')(input_layer)
encoder = Dense(encoding_dim, activation='relu')(encoder)
decoder = Dense(128, activation='relu')(encoder)
decoder = Dense(input_dim, activation='sigmoid')(decoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer=Adam(0.001), loss='mse')

# 5. Обучение модели
autoencoder.fit(X_reshaped, X_reshaped, epochs=50, batch_size=32, shuffle=True)

# 6. Расчет ошибки реконструкции
reconstructions = autoencoder.predict(X_reshaped)
mse = np.mean(np.power(X_reshaped - reconstructions, 2), axis=1)
threshold = np.quantile(mse, 0.95)  # Порог для 95% квантиля
anomalous_windows = mse > threshold

# 7. Отметка аномальных точек в исходных данных
anomaly_flags = np.zeros(len(data), dtype=int)
for i, is_anomaly in enumerate(anomalous_windows):
    if is_anomaly:
        start_idx = i
        end_idx = min(i + window_size, len(data))
        anomaly_flags[start_idx:end_idx] = 1

# 8. Создание нового датасета с бинарными метками
new_data = pd.DataFrame(index=data.index)
new_data['Class'] = data['Class']  # Сохраняем целевой столбец
for feature in features:
    new_data[feature] = anomaly_flags  # Заменяем признаки на 0/1

# Сохранение результата
new_data.to_excel("../datasets/anomalies_dataset.xlsx", index=False)

Epoch 1/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0609   
Epoch 2/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0103  
Epoch 3/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0098 
Epoch 4/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0076  
Epoch 5/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0070  
Epoch 6/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0070  
Epoch 7/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0067  
Epoch 8/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0064  
Epoch 9/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0063  
Epoch 10/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss