In [9]:
import torch
from pathlib import Path
from torch.utils.data import DataLoader, random_split
from anomaly_detection.main import setup_paths
from anomaly_detection.trainers.trainer import Trainer
from anomaly_detection.utils.tensors import *
from data.hdfs_series import HDFSEvents
from data.dataset import HDFSEventsDataset
from models.deeplog import DeepLog
from utils.torch import save_model_info
from loguru import logger

# Configure loguru logger
logger.add("debug.log", level="DEBUG")

save_folder, main_repo = setup_paths()

# Check device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Model configuration and instantiation
model_name = "deeplog"
configs = {
    "deeplog": {
        "train_val_split": 0.8,
        "window_size": 10,
        "max_iters": 1000,
        "eval_interval": 100,
        "input_size": 1,
        "hidden_size": 64,
        "num_layers": 2,
        "output_size": 28,
        "batch_size": 16,
    },
}
hp = configs[model_name]
model = DeepLog(input_size=hp["input_size"], hidden_size=hp["hidden_size"], num_layers=hp["num_layers"], output_size=hp["output_size"]).to(device)

# Process dataset
events = HDFSEvents.from_text_file(main_repo / 'data/hdfs/hdfs_train', nrows=100)
dataset = HDFSEventsDataset(events, window_size=hp["window_size"])

# Split dataset
total_size = len(dataset)
train_size = int(total_size * hp["train_val_split"])
val_size = total_size - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=hp["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=hp["batch_size"], shuffle=False)


In [13]:
# Print dataset of one data point
xs, xy = next(iter(train_loader))
# Print sizes of xs, xy
pretty_print_tensor(xs)
pretty_print_tensor(xy)




-------------------
Tensor Info:
Shape: torch.Size([16, 10])	Datatype: torch.int64
-------------------


-------------------
Tensor Info:
Shape: torch.Size([16])	Datatype: torch.int64
-------------------

