In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch

from resources.data_loader import DataLoader
from pipeline import get_default_config, plot_history
from pipeline import classify_shapelets_mts, train_mts
from pipeline import classify_shapelets_text, train_text

# Data Loading

In [2]:
data_loader = DataLoader()

# Time Series

In [None]:
# Preload the MTS datasets
mts_datasets = data_loader.get_mts_datasets()

In [None]:
# Note: Can put all parameters in the config dict
#       Can then do whatever ablation studies / experiments with that
config = get_default_config()
config["stride"] = 5
for dataset in mts_datasets:
    print(dataset)
    X_train, y_train = data_loader.load_mts_dataset(dataset, split="train")
    X_test, y_test = data_loader.load_mts_dataset(dataset, split="test")
    # Filter for ragged datasets (e.g. JapaneseVowels)
    if type(X_train) == list:
        continue
    history, encoder = train_mts(X_train, config, random_state=42, debug=False)
    plot_history(history, f"plots/encoder_training_{dataset}.pdf")
    
    classify_shapelets_mts(X_train, y_train, X_test, y_test, config, encoder)

# Text

In [3]:
# Text data loading
X, y = data_loader.load_text_dataset("data")
train_indices, test_indices = train_test_split(np.arange(len(X)), stratify=y, train_size=20, test_size=100, random_state=42)
X_train = X[train_indices]
X_test = X[test_indices]
y_train = y[train_indices]
y_test = y[test_indices]
print(X_train.shape)
print(X_test.shape)

(20,)
(100,)


load config:

In [16]:
config = get_default_config()
config['min_length'] = 5
config['max_length'] = 20

train encoder:

In [19]:
history, encoder, tokenizer = train_text(X_train, config, random_state=42, debug=True)

# plot and save history:
plot_history(history, f"plots/encoder_training_text.pdf")

# save encoder:
torch.save(encoder.state_dict(), "encoder.pt")

# save tokenizer:
tokenizer.save('tokenizer.json')

AssertionError: 

run shapelet classification:

In [18]:
classify_shapelets_text(X_train, y_train, X_test, y_test, config, tokenizer, encoder)

['<br /> <br />', 'I saw it, and I', 'News: I have to give', 'a lot of other so', ", but it this isn't", 'in this film, the', 'new movie, which may', 'it one of the best', 'years, is like a', 'my review with the fact', 'she has is more then', 'this film is wonderful film for', 'etc.) But this movie', 'Zombie movie? "I', 'draw the audience into the', 'thing with "Freddy vs.', 'would not be out of', "her husband's business,", 'by the original when I was', 'knockout and getting to see', 'all that bad of a movie', ', as she was in her', 'a lot to like about this movie.', 'America. It helped me to', 'and you may find yourself']
Accuracy: 0.52
