In [1]:
import sys
sys.path.append('../')

In [2]:
from utils.ucr_helpers import UCR_Data
data = UCR_Data("StarLightCurves")

In [3]:
fig = data.plot_fig()
fig.update_layout(template = "plotly_dark")
fig.show()

In [7]:
import numpy as np
import torch
correlations = torch.tensor(np.corrcoef(data.X))

In [28]:
import torch
from tqdm import tqdm
from models.embedding_models import MatrixFactorization

model = MatrixFactorization(
    n_time_series=data.X.shape[0], embedding_dim=20, normalize=True
)
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

losses = []
learning_rates = []
regularization_loss_weight = 0.1
pairwise_loss_weight = 0.001
for epoch in tqdm(range(30)):
    optimizer.zero_grad()
    pairwise_embedding_similarities = model()
    pairwise_loss = pairwise_loss_weight * model.calculate_loss(
        correlations,
        pairwise_embedding_similarities,
        loss_function=torch.nn.functional.l1_loss,
    )
    regularization_loss = (
        regularization_loss_weight
        * torch.abs(torch.linalg.norm(model.embeddings.weight, dim=1) - 1).sum()
    )
    loss = pairwise_loss + regularization_loss
    loss.backward()
    optimizer.step()

    scheduler.step()

    losses.append((loss.item(), pairwise_loss.item(), regularization_loss.item()))
    learning_rates.append(optimizer.param_groups[0]['lr'])


100%|██████████| 30/30 [01:16<00:00,  2.55s/it]


In [29]:
import plotly.graph_objects as go

# Unpack the losses
total_losses, pairwise_losses, regularization_losses = zip(*losses)

# Create a figure
fig = go.Figure()

# Add traces for pairwise and regularization losses
fig.add_trace(go.Scatter(x=list(range(len(total_losses))), y=pairwise_losses, mode='lines', name='Total Loss'))
fig.add_trace(go.Scatter(x=list(range(len(pairwise_losses))), y=pairwise_losses, mode='lines', name='Pairwise Loss'))
fig.add_trace(go.Scatter(x=list(range(len(regularization_losses))), y=regularization_losses, mode='lines', name='Regularization Loss'))

# Create a secondary y-axis for the total loss
fig.update_layout(
    yaxis=dict(title='Pairwise and Regularization Loss'),
    yaxis2=dict(title='Learning Rate', overlaying='y', side='right')
)

# Add the total loss trace
fig.add_trace(go.Scatter(x=list(range(len(learning_rates))), y=learning_rates, mode='lines', name='Learning rate', yaxis='y2'))

# Update layout
fig.update_layout(title='Losses During Training', xaxis_title='Epoch', yaxis_title='Loss')
fig.update_layout(template='plotly_dark')

# Show the figure
fig.show()


In [30]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

smote = False
embeddings = model.embeddings.weight.detach().numpy()
train_size = data.X_train.shape[0]
embeddings_train = embeddings[:train_size, :]
embeddings_test = embeddings[train_size:, :]
y_train = data.y[:train_size]
y_test = data.y[train_size:]


# classifier = SVC(kernel='rbf')
classifier = MLPClassifier(hidden_layer_sizes=[100], max_iter=1000)
# classifier = AdaBoostClassifier(base_estimator=SVC(kernel='rbf', probability=True))
if smote:
    sm = SMOTE(random_state=42)
    X_train_oversampled, y_train_oversampled = sm.fit_resample(embeddings_train, y_train)
    classifier.fit(X_train_oversampled, y_train_oversampled)
else:
    classifier.fit(embeddings_train, y_train)
y_preds = classifier.predict(embeddings_test)
print(classification_report(y_true=y_test, y_pred=y_preds))

              precision    recall  f1-score   support

           1       0.33      0.26      0.29      1177
           2       1.00      0.99      0.99      2305
           3       0.82      0.87      0.84      4754

    accuracy                           0.81      8236
   macro avg       0.72      0.71      0.71      8236
weighted avg       0.80      0.81      0.81      8236




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.



In [31]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA

# generate some sample data
embeddings = model.embeddings.weight.detach().numpy()

# apply PCA to reduce the dimensionality of the data to 2D
pca = PCA(n_components=3)
pca.fit(embeddings)
pca_data = pca.transform(embeddings)

# generate some sample class labels
labels = data.y

# convert the PCA data, class labels, and entity names to a pandas DataFrame for plotting with plotly
df = pd.DataFrame({'x': pca_data[:, 0], 'y': pca_data[:, 1], 'label': labels})

# plot the data using plotly, colored by the class labels and with entity names in the hover label
fig = px.scatter(df, x='x', y='y', color='label', hover_name='label')
fig.update_layout(template='plotly_dark')
fig.show()