In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, LearningRateScheduler
from tensorflow.keras.metrics import Recall
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.initializers import HeUniform
from tensorflow.keras import regularizers
import os
import sklearn.metrics
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import seaborn as sns
import keras as ks


In [None]:
dataset = pd.read_csv('/content/drive/MyDrive/merged_data.csv', index_col='ts', parse_dates=True)

In [None]:
# Class weights calculation
neg, pos = np.bincount(dataset['anomaly_label'])
total = neg + pos
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)
adjusted_weight_for_0 = weight_for_0 * 1.0  # Example: decrease if too many false positives
adjusted_weight_for_1 = weight_for_1 * 1.0  # Example: increase if too many false negatives

class_weight = {0: adjusted_weight_for_0, 1: adjusted_weight_for_1}
time_steps = 50  # Adjust based on your sequence length
batch_size = 128 # Adjust based on your batch size preference
output_size = 1

In [None]:
# Split data function
def split_data_chronologically(data, test_ratio=0.2, val_ratio=0.2, target_column='anomaly_label'):
    total_samples = len(data)
    test_split_idx = int(total_samples * (1 - test_ratio))
    val_split_idx = int(test_split_idx * (1 - val_ratio / (1 - test_ratio)))

    train_data = data.iloc[:val_split_idx]
    val_data = data.iloc[val_split_idx:test_split_idx]
    test_data = data.iloc[test_split_idx:]

    return train_data, val_data, test_data

# Scale data function
def scale_data(train_data, val_data, test_data, target_column='anomaly_label'):
    scaler = StandardScaler()

    # Fit on training data
    train_features = train_data.drop(columns=[target_column])
    scaler.fit(train_features)

    # Transform features
    train_scaled = scaler.transform(train_features)
    val_scaled = scaler.transform(val_data.drop(columns=[target_column]))
    test_scaled = scaler.transform(test_data.drop(columns=[target_column]))

    # Extract labels
    train_labels = train_data[target_column].values
    val_labels = val_data[target_column].values
    test_labels = test_data[target_column].values

    return train_scaled, train_labels, val_scaled, val_labels, test_scaled, test_labels

# Create TensorFlow dataset function
def create_tf_dataset(features, labels, time_steps=100, batch_size=32, shuffle=False):
    dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=features,
        targets=np.roll(labels, -time_steps),
        sequence_length=time_steps,
        sequence_stride=1,
        shuffle=shuffle,
        batch_size=batch_size,
    )
    return dataset

# Define metrics
METRICS = [
      ks.metrics.MeanSquaredError(name='MSE'),
      ks.metrics.TruePositives(name='tp'),
      ks.metrics.FalsePositives(name='fp'),
      ks.metrics.TrueNegatives(name='tn'),
      ks.metrics.FalseNegatives(name='fn'),
      ks.metrics.BinaryAccuracy(name='accuracy'),
      ks.metrics.Precision(name='precision'),
      ks.metrics.Recall(name='recall'),
      ks.metrics.AUC(name='auc'),
      ks.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

In [None]:
train_data, val_data, test_data = split_data_chronologically(dataset)
train_scaled, train_labels, val_scaled, val_labels, test_scaled, test_labels = scale_data(train_data, val_data, test_data)

    # Dataset Creation
train_dataset = create_tf_dataset(train_scaled, train_labels, time_steps=time_steps, batch_size=batch_size, shuffle=False)
val_dataset = create_tf_dataset(val_scaled, val_labels, time_steps=time_steps, batch_size=batch_size, shuffle=False)
test_dataset = create_tf_dataset(test_scaled, test_labels, time_steps=time_steps, batch_size=batch_size, shuffle=False)
input_shape = (time_steps, train_scaled.shape[-1])

In [None]:
# Define the LSTM model with adjustments
model = Sequential([
    LSTM(500, return_sequences=True, input_shape=input_shape, kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),

    LSTM(400, return_sequences=True, kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),

    LSTM(300, return_sequences=True, kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),

    LSTM(200, return_sequences=True, kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),

    LSTM(100, kernel_regularizer=regularizers.l2(0.01)),
    BatchNormalization(),
    Dropout(0.5),

    Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01))
])

# Compile the model with a lower learning rate
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Recall(), tf.keras.metrics.Precision()])

# Define learning rate scheduler
def lr_scheduler(epoch, lr):
    if epoch < 5:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_callback = LearningRateScheduler(lr_scheduler)

# Define early stopping criteria
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with early stopping and learning rate scheduler
history = model.fit(train_dataset, epochs=50,batch_size=128,
                    validation_data=val_dataset, class_weight = class_weight)

# Evaluate the model
loss, accuracy,recall, precision = model.evaluate(test_dataset)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)
print("Test recall:", recall)
print("Test precision:", precision)

# Make predictions
predictions = model.predict(test_dataset)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50

In [None]:
from tensorflow.keras.models import load_model
import pickle

# Assuming you have trained your model and stored the training history in a variable called 'history'

# Save the trained model
model.save("your_model.keras")

# Save the training history
with open('history.pkl', 'wb') as file:
    pickle.dump(history.history, file)