In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')

import pandas as pd
meta_df = pd.read_csv("../Data/UCR_Summary.csv")
meta_df["learned_w"] = meta_df["DTW (learned_w) "].apply(lambda xi: int(xi.split("(")[-1].split(")")[0]))
meta_df.index = meta_df["Name"].str.lower()
meta_df.columns = [c.strip() for c in meta_df.columns]

results_df = pd.read_csv("../Data/UCR_results.csv", delimiter="\t", encoding="utf-16", index_col=0)

### Load the data

In [3]:
from utils.ucr_helpers import UCR_Data
# data = UCR_Data("Mallat")
data:UCR_Data = UCR_Data("ItalyPowerDemand")
# data = UCR_Data("Beef")

In [4]:
print(data.summary)

Number of classes: 2
Number of training samples: 67
Number of test samples: 1029
Length of time series: 24


In [5]:
fig = data.plot_fig()
fig.show()

In [6]:
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Define a custom Dataset
class TimeSeriesDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the autoencoder architecture
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_size, encoding_dim),
            nn.ReLU(True)
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, input_size),
            nn.ReLU(True)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# class LSTM_Autoencoder(nn.Module):
#     def __init__(self, input_size, hidden_size, num_layers=1):
#         super(Autoencoder, self).__init__()
#         self.encoder = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.decoder = nn.LSTM(hidden_size, input_size, num_layers, batch_first=True)

#     def forward(self, x):
#         encoded, _ = self.encoder(x)
#         decoded, _ = self.decoder(encoded)
#         return decoded

# Parameters
n_time_series = data.X.shape[0]  # Example number of time series
length_time_series = data.X.shape[1]  # Example length of each time series
encoding_dim = 160  # Size of the encoding

# Instantiate the model
autoencoder = Autoencoder(length_time_series, encoding_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.1)
print(optimizer.param_groups[0]["lr"])
PATIENCE = 10
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", factor=0.8, patience=PATIENCE, threshold=0.1
)

# Prepare your data
# Assuming X is your (n_time_series, length_time_series) dataset
X = torch.tensor(data.X, dtype=torch.float32)

# Convert your numpy array to a PyTorch tensor
X_tensor = torch.tensor(X, dtype=torch.float32)

# Create a dataset
dataset = TimeSeriesDataset(X_tensor)

# Create a DataLoader
batch_size = 10
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Training the model
num_epochs = 100  # Number of epochs

for epoch in range(num_epochs+1):
    for batch in dataloader:
        # Get the mini-batch

        # Forward pass
        outputs = autoencoder(batch)
        loss = criterion(outputs, batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step(loss)
    if epoch % 10 ==0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, LR: {optimizer.param_groups[0]["lr"]}')

# After training, you can use model.encoder to get the feature representation


0.1
Epoch [1/100], Loss: 0.9140, LR: 0.013421772800000007



To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).



Epoch [11/100], Loss: 0.9266, LR: 4.0173451106474913e-08
Epoch [21/100], Loss: 0.9154, LR: 4.0173451106474913e-08
Epoch [31/100], Loss: 0.9274, LR: 4.0173451106474913e-08
Epoch [41/100], Loss: 0.9275, LR: 4.0173451106474913e-08
Epoch [51/100], Loss: 0.9232, LR: 4.0173451106474913e-08
Epoch [61/100], Loss: 0.9222, LR: 4.0173451106474913e-08
Epoch [71/100], Loss: 0.9235, LR: 4.0173451106474913e-08
Epoch [81/100], Loss: 0.9263, LR: 4.0173451106474913e-08
Epoch [91/100], Loss: 0.9269, LR: 4.0173451106474913e-08
Epoch [101/100], Loss: 0.9213, LR: 4.0173451106474913e-08


In [12]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
# classifier = KNeighborsClassifier(n_neighbors=1, metric='euclidean')

embeddings = autoencoder.encoder(X).detach().numpy()
train_size = data.X_train.shape[0]
X_train = embeddings[:train_size, :]
X_test = embeddings[train_size:, :]
y_train = data.y[:train_size]
y_test = data.y[train_size:]

from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from utils.ucr_helpers import evaluate_resampling_UCR
report, _, _ = evaluate_resampling_UCR(
    X_train,
    X_test,
    data.y_train,
    data.y_test,
    # classifier=SVC(kernel="rbf"),
    classifier=KNeighborsClassifier(n_neighbors=1),
    # classifier=DecisionTreeClassifier(),
    n_resamples=20,
    verbose=True,
    scale=False,
    over_sampling=True
)
report

100%|██████████| 19/19 [00:00<00:00, 22.90it/s]


{'precision': 0.9293481996083008,
 'recall': 0.9275510204081634,
 'f1-score': 0.9274634799100013,
 'accuracy': 0.9275510204081634}

Potential to combine MF and Autoencoder embeddings?