In [1]:
import sys
import os
project_path = '../../..'
sys.path.append(os.path.abspath(project_path))

In [2]:
from matplotlib import pyplot as plt
import numpy as np

# Load Synthetic Data

In [3]:
num_states = 2
emission_dist = 't'
save_path = '2013-2018_2states'
folder_path = f"{project_path}/data/synthetic/{emission_dist}-hmm/{save_path}"
s_filename = f"{folder_path}/S_data.npy"
x_filename = f"{folder_path}/X_data.npy"
S = np.load(s_filename)
X = np.load(x_filename)

In [4]:
from data_code.dataloader import create_dataloaders, create_datasets

window_size = 390
batch_size = 32

train_loader, val_loader, test_loader = create_dataloaders(
    X, S, window_size=window_size, train_ratio=0.6, val_ratio=0.2, batch_size=batch_size, standardize=True, feature_engineer=True
)

# Search the Best Hyper-parameter for Jump Model

In [5]:
import optuna
import json
import os
from utils.metrics import balanced_accuracy
from model.jump_module import JumpModule

# Construct a grid for jump_penalty using logarithmic uniform spacing
grid_values = np.logspace(-3, 5, 17).tolist()  # Generate values from 1e-3 to 1e5 in log-uniform spacing

def objective(trial):
    # Select a jump_penalty value from the pre-defined grid
    jump_penalty = trial.suggest_categorical("jump_penalty", grid_values)
    
    # Construct and train the model using the selected hyperparameter
    jump = JumpModule(num_states, jump_penalty=jump_penalty, max_iter=100)
    jump.fit(train_loader)
    true_s_val, pred_s_val = jump.inference(val_loader)
    val_bal_acc = balanced_accuracy(true_s_val.ravel(), pred_s_val.ravel(), n_classes=num_states)
    
    return val_bal_acc

# Define the search space
search_space = {"jump_penalty": grid_values}
# Use GridSampler to perform grid search
sampler = optuna.samplers.GridSampler(search_space)

# Create an Optuna Study and set the direction to maximize Balanced Accuracy
study = optuna.create_study(direction="maximize", sampler=sampler)
study.optimize(objective)

# Output the best hyperparameters
print("Best trial:")
best_trial = study.best_trial
for key, value in best_trial.params.items():
    print(f"  {key}: {value}")

# Save the best hyperparameters to a file
best_params = best_trial.params

os.makedirs(f"{project_path}/hyper_parameter/best_jump", exist_ok=True)
param_file_name = f"{project_path}/hyper_parameter/best_jump/{save_path}.json"
with open(param_file_name, "w") as f:
    json.dump(best_params, f, indent=4)

print(f"Best hyperparameters saved to '{param_file_name}'.")


[I 2025-03-24 09:40:55,690] A new study created in memory with name: no-name-426a408a-cc25-4147-93eb-da82970b1ee3
[I 2025-03-24 09:41:35,511] Trial 0 finished with value: 0.534482902940135 and parameters: {'jump_penalty': 0.0031622776601683794}. Best is trial 0 with value: 0.534482902940135.
[I 2025-03-24 09:42:15,467] Trial 1 finished with value: 0.5344158873645342 and parameters: {'jump_penalty': 1.0}. Best is trial 0 with value: 0.534482902940135.
[I 2025-03-24 09:42:54,870] Trial 2 finished with value: 0.5342050065513503 and parameters: {'jump_penalty': 10.0}. Best is trial 0 with value: 0.534482902940135.
[I 2025-03-24 09:43:33,469] Trial 3 finished with value: 0.5519870888459636 and parameters: {'jump_penalty': 31.622776601683793}. Best is trial 3 with value: 0.5519870888459636.
[I 2025-03-24 09:44:12,410] Trial 4 finished with value: 0.5810899248886348 and parameters: {'jump_penalty': 3162.2776601683795}. Best is trial 4 with value: 0.5810899248886348.
[I 2025-03-24 09:44:51,857

Best trial:
  jump_penalty: 3162.2776601683795
Best hyperparameters saved to '../../../hyper_parameter/best_jump/2013-2018_2states.json'.


In [6]:
# Load the best hyper-parameter
with open(param_file_name, "r") as f:
    loaded_params = json.load(f)

print("Loaded hyperparameters:")
print(loaded_params)

jump = JumpModule(num_states, jump_penalty=loaded_params['jump_penalty'], max_iter=100)
jump.fit(train_loader)

true_s_train_jump, pred_s_train_jump = jump.inference(train_loader)
true_s_test_jump, pred_s_test_jump = jump.inference(test_loader)

jump_bal_acc_train = balanced_accuracy(true_s_train_jump.ravel(), pred_s_train_jump.ravel(), n_classes = num_states)
jump_bal_acc_test = balanced_accuracy(true_s_test_jump.ravel(), pred_s_test_jump.ravel(), n_classes = num_states)
print(f"[Jump] Balanced Accuracy [train]: {jump_bal_acc_train:.4f}")
print(f"[Jump] Balanced Accuracy [test]: {jump_bal_acc_test:.4f}")

Loaded hyperparameters:
{'jump_penalty': 3162.2776601683795}
[Jump] Balanced Accuracy [train]: 0.5580
[Jump] Balanced Accuracy [test]: 0.5333


# Gaussian HMM

In [7]:
from model.hmm_module import GaussianHMMModule

hmm_model = GaussianHMMModule(n_components=num_states, covariance_type='full', n_iter=100, random_state=42)
hmm_model.fit(train_loader)
true_s_train_hmm, pred_s_train_hmm = hmm_model.inference(train_loader)
true_s_test_hmm, pred_s_test_hmm = hmm_model.inference(test_loader)

# Evaluation
hmm_bal_acc_train = balanced_accuracy(true_s_train_hmm.ravel(), pred_s_train_hmm.ravel(), n_classes = num_states)
hmm_bal_acc_test = balanced_accuracy(true_s_test_hmm.ravel(), pred_s_test_hmm.ravel(), n_classes = num_states)
print(f"[HMM] Balanced Accuracy [train]: {hmm_bal_acc_train:.4f}")
print(f"[HMM] Balanced Accuracy [test]: {hmm_bal_acc_test:.4f}")

[HMM] Balanced Accuracy [train]: 0.5071
[HMM] Balanced Accuracy [test]: 0.5001


# KMeans++

In [8]:
from model.kmeans_module import KMeansModule

kmeans = KMeansModule(n_clusters=num_states, n_init=10, max_iter=300)
kmeans.fit(train_loader)

true_s_train_kmeans, pred_s_train_kmeans = kmeans.inference(train_loader)
true_s_test_kmeans, pred_s_test_kmeans = kmeans.inference(test_loader)

kmeans_bal_acc_train = balanced_accuracy(true_s_train_kmeans.ravel(), pred_s_train_kmeans.ravel(), n_classes = num_states)
kmeans_bal_acc_test = balanced_accuracy(true_s_test_kmeans.ravel(), pred_s_test_kmeans.ravel(), n_classes = num_states)
print(f"[KMeans] Balanced Accuracy [train]: {kmeans_bal_acc_train:.4f}")
print(f"[KMeans] Balanced Accuracy [test]: {kmeans_bal_acc_test:.4f}")

[KMeans] Balanced Accuracy [train]: 0.5355
[KMeans] Balanced Accuracy [test]: 0.5196
