In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch

from data_loader import DataLoader
from pipeline import get_default_config, plot_history
from pipeline import classify_shapelets_mts, train_mts
from pipeline import classify_shapelets_text, train_text

# Data Loading

In [None]:
data_loader = DataLoader()
# Preload the MTS datasets
mts_datasets = data_loader.get_mts_datasets()

# Time Series

In [None]:
# Note: Can put all parameters in the config dict
#       Can then do whatever ablation studies / experiments with that
config = get_default_config()
config["stride"] = 5
for dataset in mts_datasets:
    print(dataset)
    X_train, y_train = data_loader.load_mts_dataset(dataset, split="train")
    X_test, y_test = data_loader.load_mts_dataset(dataset, split="test")
    # Filter for ragged datasets (e.g. JapaneseVowels)
    if type(X_train) == list:
        continue
    history, encoder = train_mts(X_train, config, random_state=42, debug=False)
    plot_history(history, f"plots/encoder_training_{dataset}.pdf")
    
    classify_shapelets_mts(X_train, y_train, X_test, y_test, config, encoder)

# Text

In [None]:
# Text data loading
X, y = data_loader.load_text_dataset("data")
train_indices, test_indices = train_test_split(np.arange(len(X)), stratify=y, train_size=200, test_size=100, random_state=42)
X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]
print(X_train.shape)
print(X_test.shape)

In [None]:
config = get_default_config()
history, encoder = train_text(X_train, config, random_state=42, debug=True)

In [None]:
torch.save(encoder.state_dict(), "encoder.pt")