In [None]:
import numpy as np
from itertools import combinations, product
from myf import calculate_f_jk, calculate_g_x
from joblib import Parallel, delayed
import os
import pickle
from tslearn.clustering import silhouette_score

In [12]:
# apply with a test sample or whole validation set
# suspect data is (-1, 1440)
def apply_motif(
    motifs,
    tau,
    data,
    file_name,
    dtw_matrix_validation=None,
    n_jobs=-1,
):
    """
    motif_info = {
            "motif": {"pattern1": profiles[row], "pattern2": profiles[col]}#non transformed
            "pair_idx": (row, col),
            "score": min_value,
            "barycenter": barycenter or None
        }
    """

    if dtw_matrix_validation is None:

        motif_num = len(motifs)
        data_num = len(data)
        # rows, each data, columns each motif, entry: dist
        dtw_matrix_validation = np.zeros((data_num, motif_num))
        # motif_num is motif_index
        indices = list(product(range(data_num), range(motif_num)))

        # inner function
        def min_distance(data_i, train_j):
            # train_j is a list or array with two profiles
            return min(
                calculate_f_jk(data_i, train_j["pattern1"]),
                calculate_f_jk(data_i, train_j["pattern2"]),
            )

        # j is motif index in motifs list, should be the same as motif index in info
        distances = Parallel(n_jobs=n_jobs)(
            delayed(min_distance)(data[i], motifs[j]["motif"]) for i, j in indices
        )

        # Assign score into matrix
        for (i, j), dist in zip(indices, distances):
            dtw_matrix_validation[i, j] = dist

        np.save(
            f"dtw_matrix_validation_multi_{file_name}.npy", dtw_matrix_validation
        )  # for original motifs with no motif removed

    my_mask = np.zeros(dtw_matrix_validation.shape, dtype=bool)  # initiation to zeros

    masked = np.ma.masked_array(
        dtw_matrix_validation, mask=my_mask
    )  # mask need to match matrix shape (n, k)

    threshold = [motif["score"] + tau for motif in motifs]  # shape k
    threshold = np.array(threshold)

    # Create a new mask where entries are True if masked >= threshold (broadcasted)
    new_mask = masked >= threshold  # shape (n, k), broadcasting threshold

    # Combine with the existing mask (if any)
    combined_mask = np.logical_or(masked.mask, new_mask)

    # Apply the combined mask to get a new masked array
    masked = np.ma.masked_array(
        masked.data, mask=combined_mask
    )  # Now masked has entries masked where masked >= threshold (per column)

    # Set index to -1 where all values are masked
    min_indices = np.full(masked.shape[0], -1)  # initiation all to -1
    for i, row in enumerate(masked):
        # if at least one value is unmasked
        if not np.all(row.mask):

            min_indices[i] = np.argmin(row)  # idx of corresponding motif

    min_barycenter = [
        (
            np.squeeze(motifs[idx]["barycenter"])
            if idx != -1
            else np.array(calculate_g_x(d))
        )
        for idx, d in zip(min_indices, data)
    ]

    return np.array(min_indices), np.array(
        min_barycenter
    )  # (n,) and (n, MOTIF_LENGTH) barycenter are transformed ones

we apply the trained motif to validation to visuzlize 1. final motif numbers 2. outlier numbers and choose two better models

In [None]:
motif_file_list = [
    "9.236420021787648_2.7709260065362944_multi_motifs.pkl", #0.3
    "9.236420021787648_3.694568008715059_multi_motifs.pkl", #0.4
    "9.236420021787648_4.618210010893824_multi_motifs.pkl", #0.5
]

In [4]:
tau = [float(motif_file.split("_")[1]) for motif_file in motif_file_list]

In [8]:
tau

[2.7709260065362944, 3.694568008715059, 4.618210010893824]

In [5]:
motif_list = []
for motif_file in motif_file_list:
    with open(motif_file, "rb") as f:
        motif_list.append(pickle.load(f))

In [6]:
for motif in motif_list:
    print(len(motif))

53
28
18


In [7]:
# for validation use tukey test to remove outlier or 5-10% or smaller than certain numbers of cases
validation_files = os.listdir("smoothed_multi_val")
validation_list = []
for validation_file in validation_files:
    file_path = os.path.join("smoothed_multi_val", validation_file)
    validation_list.append(np.load(file_path))
validation_data = np.array(validation_list)

In [16]:
stored_motif_val_dist = []
stored_motif_val_dist.append(
    np.load("dtw_matrix_validation_multi_2.7709260065362944.npy")
)
stored_motif_val_dist.append(
    np.load("dtw_matrix_validation_multi_3.694568008715059.npy")
)
stored_motif_val_dist.append(
    np.load("dtw_matrix_validation_multi_4.618210010893824.npy")
)

In [13]:
val_results = Parallel(n_jobs=-1)(
    delayed(apply_motif)(
        motif_list[i], tau[i], validation_data, tau[i], stored_motif_val_dist[i]
    )
    for i in range(len(motif_list))
)

In [15]:
for i in range(len(val_results)):
    min_indices, _ = val_results[i]
    print(len(min_indices[min_indices == -1]))
    print(len(min_indices[min_indices != -1]))

318
3904
385
3837
503
3719
