In [None]:
import random
import pandas as pd
import matplotlib.pylab as plt
from math import sqrt
import numpy as np 
from collections import defaultdict
from numpy import linalg as LA
from copy import deepcopy

def cluster_time_series(t, c): 
    """
    Perform K-Spectral Centroid (K-SC) clustering on time series data.

    Parameters:
        t (ndarray): Time series data of shape (n_samples, n_timestamps)
        c (int): Number of clusters (k)

    Returns:
        mem (ndarray): Cluster labels for each time series
        mu (ndarray): Cluster centroids
    """
    random.seed(0)
    matrix = t
    k = c
    N = matrix.shape[0]
    
    # Randomly initialize cluster membership for each time series
    mem = np.array([random.randint(0, k-1) for idx in range(N)])
    
    # Define alpha as in the original K-SC paper
    alpha = lambda x, y: np.dot(x, y) / np.dot(y, y)
    
    # Define d^ (d_hat) as the shape-based distance
    d_hat = lambda x, y: (np.linalg.norm(x - alpha(x, y) * y)) / (np.linalg.norm(x))
    
    for it in range(100):
        print("Iteration", it)  # Display the current iteration number
        prev_mem = deepcopy(mem)
        mu = np.zeros((k, matrix[0].shape[0]))  # Cluster centroids
        
        # Update centroids
        for j in range(k):
            A = []
            for vec_idx in range(N): 
                if mem[vec_idx] == j: 
                    vec = matrix[vec_idx]
                    A.append(vec)
            A = np.array(A)
            if A.shape[0] == 0: 
                mu[j] = np.zeros(matrix[0].shape[0])
                continue
            interm = np.sqrt(np.sum(np.square(A), axis=1))
            B = np.divide(A, np.tile(interm, (A.shape[1], 1)).T)
            M = np.matmul(B.T, B) - A.shape[0] * np.identity(A.shape[1])
            w, v = LA.eig(M)
            idx = np.argmin(np.abs(w))
            mu[j] = -v[:, idx] if np.sum(v[:, idx]) < 0 else v[:, idx]

        # Reassign time series to nearest centroid
        for vec_idx in range(N): 
            vec = matrix[vec_idx]
            distances = [d_hat(vec, mu[j]) for j in range(k)]
            j_star = np.argmin(distances)
            mem[vec_idx] = j_star
            
        # Stop if cluster assignments have not changed
        if np.linalg.norm(prev_mem - mem) == 0: 
            break

    # Optionally save results
    # np.save('mu_' + str(k) + '.npy', mu)
    # np.save('clusters_set_' + str(k) + '.npy', mem)

    return mem, mu

In [None]:
#read time series data
time_series=pd.read_csv('time_series.csv',index_col=0)

In [None]:
time_series

In [None]:
#clustering time series
defined_cluster=3
x=np.array(time_series)
labels,centers=cluster_time_series(x,defined_cluster)

In [None]:
for i in centers:
    plt.plot(i)
plt.show()

In [None]:
# Compute silhouette scores
import numpy as np

# Define alpha and d_hat functions (extracted from your clustering code)
def alpha(x, y):
    return np.dot(x, y) / np.dot(y, y)

def d_hat(x, y):
    return np.linalg.norm(x - alpha(x, y) * y) / np.linalg.norm(x)

def compute_silhouette_scores(x, labels):
    """
    Compute silhouette scores using the custom distance metric d_hat.
    
    Parameters:
        x: Original time series data, shape (n_samples, n_features)
        labels: Cluster labels for each sample, shape (n_samples,)
    
    Returns:
        mean_score: Mean silhouette score across all samples
        s: Silhouette score for each sample
    """
    n_samples = x.shape[0]
    D = np.zeros((n_samples, n_samples))  # Pairwise distance matrix
    
    # Compute distances between all pairs of samples
    for i in range(n_samples):
        for j in range(n_samples):
            D[i, j] = d_hat(x[i], x[j])
    
    a = np.zeros(n_samples)  # Mean intra-cluster distance
    b = np.zeros(n_samples)  # Mean nearest-cluster distance
    
    for i in range(n_samples):
        current_cluster = labels[i]
        
        # Compute a[i]: average distance to other samples in the same cluster
        same_cluster = np.where(labels == current_cluster)[0]
        same_cluster = same_cluster[same_cluster != i] 
        a[i] = np.mean(D[i, same_cluster]) if len(same_cluster) > 0 else 0
        
        # Compute b[i]: average distance to the nearest different cluster
        other_clusters = np.unique(labels[labels != current_cluster])
        b_list = []
        for c in other_clusters:
            cluster_samples = np.where(labels == c)[0]
            if len(cluster_samples) == 0:
                continue
            b_list.append(np.mean(D[i, cluster_samples]))
        
        b[i] = np.min(b_list) if len(b_list) > 0 else 0
    
    
    s = (b - a) / np.maximum(a, b)
    
    return np.mean(s), s


In [None]:
avg_silhouette, silhouette_scores = compute_silhouette_scores(x, labels)