In [1]:
import numpy as np
import pandas as pd
import os
import gdown
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import random 
import itertools
import mlflow
import mlflow.pytorch
from mlflow.models import infer_signature

# Set MLflow to use the local file-based tracking server
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

# ---- Step 0: Set up the environment ----
SEED = 42  # Set a seed for reproducibility
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---- Step 1: Prepare your data ----

def create_sequences(data, window_size):
    sequences = []
    for i in range(len(data) - window_size):
        seq = data[i:i+window_size]
        sequences.append(seq)
    return np.array(sequences)

In [2]:
train_file_names = os.listdir("train/")
train_file_names.sort()

train_files = []
for file in train_file_names:
    train_files.append(pd.read_csv(f"train/{file}", sep=";"))

test_file_names = os.listdir("test/")
test_file_names.sort()

test_files = []
for file in test_file_names:
    test_files.append(pd.read_csv(f"test/{file}", sep=";"))

# Concatenate all train files into one DataFrame
all_train_df = pd.concat(train_files, ignore_index=True)
values = all_train_df['Value1'].values.reshape(-1, 1)
# print("Shape of values:", values.shape)
# print("First 5 values:", values[:5])
# Normalize
scaler = MinMaxScaler()
values_scaled = scaler.fit_transform(values)

In [None]:
class LSTMAutoencoder(nn.Module):
    def __init__(self, seq_len, n_features, latent_dim):
        super(LSTMAutoencoder, self).__init__()
        self.encoder = nn.LSTM(input_size=n_features, hidden_size=latent_dim, batch_first=True)
        self.decoder = nn.LSTM(input_size=latent_dim, hidden_size=n_features, batch_first=True)

    def forward(self, x):
        _, (hidden, _) = self.encoder(x)
        latent = hidden.repeat(x.size(1), 1, 1).permute(1, 0, 2)
        reconstructed, _ = self.decoder(latent)
        return reconstructed

In [6]:
# Define your hyperparameter grid
window_sizes = [10, 25]
latent_dims = [8, 16]
learning_rates = [1e-3, 1e-4]
batch_sizes = [32, 64]
EPOCHS = 10

for WINDOW_SIZE, latent_dim, learning_rate, BATCH_SIZE in itertools.product(window_sizes, latent_dims, learning_rates, batch_sizes):
    with mlflow.start_run():
        # Log hyperparameters
        mlflow.log_param("WINDOW_SIZE", WINDOW_SIZE)
        mlflow.log_param("latent_dim", latent_dim)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param("EPOCHS", EPOCHS)

        # Prepare data
        values = all_train_df['Value1'].values.reshape(-1, 1)
        scaler = MinMaxScaler()
        values_scaled = scaler.fit_transform(values)
        sequences = create_sequences(values_scaled, WINDOW_SIZE)
        sequences_tensor = torch.tensor(sequences, dtype=torch.float32).to(device)

        # Model, optimizer, loss
        model = LSTMAutoencoder(seq_len=WINDOW_SIZE, n_features=1, latent_dim=latent_dim).to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Training loop
        model.train()
        for epoch in range(EPOCHS):
            for i in range(0, len(sequences_tensor), BATCH_SIZE):
                batch = sequences_tensor[i:i+BATCH_SIZE]
                optimizer.zero_grad()
                output = model(batch)
                loss = criterion(output, batch)
                loss.backward()
                optimizer.step()
            mlflow.log_metric("train_loss", loss.item(), step=epoch)

        inference_signature = infer_signature(sequences_tensor[0:0+BATCH_SIZE], model(sequences_tensor[0:0+BATCH_SIZE]))

        correct = 0
        for test in test_files:
            # Evaluation (example on first test file)
            test_values = test['Value1'].values.reshape(-1, 1)
            test_scaled = scaler.transform(test_values)
            test_sequences = create_sequences(test_scaled, WINDOW_SIZE)
            test_tensor = torch.tensor(test_sequences, dtype=torch.float32).to(device)

            model.eval()
            with torch.no_grad():
                reconstructed = model(test_tensor)
                test_loss = torch.mean((reconstructed - test_tensor) ** 2, dim=(1, 2)).cpu().numpy()

            threshold = np.percentile(test_loss, 95)
            prediction_index = np.argmax(test_loss) + WINDOW_SIZE
            score = int(test.loc[prediction_index, "Labels"] == 1)
            correct += score
            
        mlflow.log_metric("test_score", correct)
        mlflow.pytorch.log_model(model, "LSTM Autoencoder"
                                    , signature=inference_signature
                                    )



🏃 View run legendary-slug-260 at: http://127.0.0.1:5000/#/experiments/0/runs/f6a836b963a14b5194ed0fdc1d8c0d7e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run awesome-bee-891 at: http://127.0.0.1:5000/#/experiments/0/runs/f461ed9521f44d26babf6239da27ef4c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run kindly-grub-58 at: http://127.0.0.1:5000/#/experiments/0/runs/dd295fe0bfdb4e3fb31de5941000d014
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run defiant-shrimp-713 at: http://127.0.0.1:5000/#/experiments/0/runs/eba34396f197439c95e85a257f5eea96
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run youthful-yak-892 at: http://127.0.0.1:5000/#/experiments/0/runs/35be7b8715ca417697d4f687e02f94c6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run auspicious-fowl-257 at: http://127.0.0.1:5000/#/experiments/0/runs/c4026f29b64f4533bb0bbb9a714f3096
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run trusting-kite-809 at: http://127.0.0.1:5000/#/experiments/0/runs/98659a7d28164da6be3c3dcaea183f3c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run resilient-horse-497 at: http://127.0.0.1:5000/#/experiments/0/runs/d53a5a22ce7f4ee5b66c104a43a5bb32
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run inquisitive-hog-604 at: http://127.0.0.1:5000/#/experiments/0/runs/c8548c38504a4737926c42c99ce064ed
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run bustling-finch-99 at: http://127.0.0.1:5000/#/experiments/0/runs/cd3d0eaec954498ba00b8a8fb989a235
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run ambitious-ape-622 at: http://127.0.0.1:5000/#/experiments/0/runs/179da3a8c22c47188c265bc5274b7a67
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run stately-calf-313 at: http://127.0.0.1:5000/#/experiments/0/runs/c872d151f60840519eb3a423153f6589
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run debonair-hog-182 at: http://127.0.0.1:5000/#/experiments/0/runs/cb0cc43eddf340b4850e8f3cf0ab883b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run amusing-ape-572 at: http://127.0.0.1:5000/#/experiments/0/runs/3d27dbd2ac72486cb251291a336e2971
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run abundant-doe-126 at: http://127.0.0.1:5000/#/experiments/0/runs/23ada7bfc15747f29eb210b178532087
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0




🏃 View run persistent-worm-420 at: http://127.0.0.1:5000/#/experiments/0/runs/e72374dcc5af4f5c9f59724c2bc7c3ff
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


In [None]:
# To shut down mlflow 
mlflow.end_run()