In [1]:
import numpy as np


In [19]:
class MTS:   
    def __init__(self, ts):
             self.ts = ts
            
    def cov_mat(self, centering = True):
        X = self.ts
        if centering:
            X = (self.ts - (self.ts).mean(axis = 0))
        return X.transpose() @ X

In [277]:
class CPCA:
    def __init__(self, epsilon = 1e-5):
        self.cov = None
        self.epsilon = epsilon
        self.U = None
        self.V = None
        self.S = None
    
    def fit(self,listMTS):
        if (len(listMTS) > 0):
            P = listMTS[0].cov_mat().shape[1]
            cov_mat = [mat.cov_mat() for mat in listMTS]
            self.cov = sum(cov_mat)/len(cov_mat)
            #Add epsilon Id in order to ensure invertibility
            cov = self.cov + self.epsilon*np.eye(P)
            #Compute SVD
            U,S,V = np.linalg.svd(self.cov)
            #Save SVD
            self.U = U
            self.S = S
            self.V = V
        

    def pred(self, listMTS, ncp):
        predicted = []
        if (self.U is not None):
            predicted = [elem.ts @ self.U[:,:ncp] for elem in listMTS]
        return predicted
    
    def reconstitution_error(self, listMTS, ncp):
        mse = np.full(len(listMTS),np.inf)
        if (self.U is not None):
            prediction = self.pred(listMTS, ncp)
            reconstit = [elem @ ((self.U)[:,:ncp].transpose()) for elem in prediction]
            mse = [((listMTS[i].ts - reconstit[i])**2).sum() for i in range(len(prediction))]
        return mse

## MTS

In [493]:
#Import lp1 for test
import pandas as pd
res = [pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/robotfailure-mld/lp1.data", sep = "\t",skiprows=1+(18*i), nrows=15, header = None) for i in range(1,88)]
res = [MTS(elem.drop(columns = [0]).to_numpy()) for elem in res]

In [494]:
name = [pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/robotfailure-mld/lp1.data", sep = "\t",skiprows=(18*i), nrows=1, header = None) for i in range(1,88)]

In [557]:
name = [elem[0][0] for elem in name]

name_unique = list(np.unique(name))
dict_name = dict(zip(name_unique, list(range(len(name_unique)))))

gt_nb_cluster = np.array([dict_name.get(nom) for nom in name])

In [495]:
X = res

In [544]:
K = 5
N = 87
p = 2
epsilon = 1

In [581]:
index_cluster = np.tile(np.arange(K), int(N/K) + 1)[:N]
#index_cluster = np.random.choice(K, N)

In [582]:
to_continue = True
iter_max = 1000
i = 0
old_error = -1
while to_continue:

    #Split all MTS according to the cluster 
    #we store it in a list of lists of MTS (each list inside the list corresponding to a cluster)
    MTS_by_cluster = [[X[i] for i in list(np.where(index_cluster == j)[0])] for j in range(K)]

    CPCA_by_cluster = [CPCA() for i in range(K)]

    #fit by cluster
    [CPCA_by_cluster[i].fit(MTS_by_cluster[i]) for i in range(K)]

    res = np.array([cpca.reconstitution_error(X, p) for cpca in CPCA_by_cluster])
    #Update index cluster
    print(np.all(res.argmin(axis = 0) == index_cluster))
    index_cluster = res.argmin(axis = 0)

    #new total error 
    new_error = res.min(axis = 0).sum()
    to_continue = (abs(old_error - new_error) > epsilon) & (iter_max > i)

    #Updata
    old_error = new_error #critère pourri ? plutôt regarder l'évolution des indices non ?
    i += 1

False
False
False
False
False
False
False
False
False
False
False
True
True


In [549]:
index_cluster

array([0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 1,
       1, 0, 4, 0, 0, 0, 1, 1, 0, 0, 3, 1, 4, 2, 3, 3, 3, 3, 1, 0, 3, 4,
       4, 2, 2, 2, 3, 4, 3, 2, 2, 2, 0, 0, 0, 4, 4, 4, 4, 3, 2, 2, 1, 1,
       3, 3, 3, 3, 3, 2, 2, 3, 2, 3, 4, 4, 0, 4, 2, 2, 4, 2, 0, 0, 3],
      dtype=int64)

$$Pre $=\sum_{j=1}^{K} \underbrace{\frac{\left|C_{j}\right|}{N}}_{\text{prop_part}} \times \underbrace{\max _{i=1,2, \cdots, g} \frac{\left|G_{i} \cap C_{j}\right|}{\left|C_{j}\right|}}_{\text{max_part}}$$

In [580]:
def precision(gt_cluster, pred_cluster):
    g = np.unique(gt_nb_cluster)
    nb_g = g.shape[0]
    
    G = [np.where(gt_nb_cluster == i)[0] for i in range(nb_g)]
    C = [np.where(index_cluster == i)[0] for i in range(K)]
    
    max_part = np.array([max([np.intersect1d(G[i],C[j]).shape[0]/C[j].shape[0] for i in range(nb_g)]) for j in range(K)])
    prop_part = np.array([C[j].shape[0]/N for j in range(K)])
    return max_part.dot(prop_part)

In [598]:
class Mc2PCA:
    def __init__(self,K, ncp, itermax = 1000, conv_crit = 1e-5):
        self.K = K
        self.N = None
        self.ncp = ncp
        self.iter_max = itermax
        self.converged = False
        self.CPCA_final = None
        self.conv_crit = conv_crit
        
    def fit(self, X):
        N = len(X)
        #initialisation
        index_cluster = np.tile(np.arange(self.K), int(N/self.K) + 1)[:N]
        to_continue = True
        i = 0
        old_error = -1
        
        while to_continue:

            #Split all MTS according to the cluster 
            #we store it in a list of lists of MTS (each list inside the list corresponding to a cluster)
            MTS_by_cluster = [[X[i] for i in list(np.where(index_cluster == j)[0])] for j in range(self.K)]

            CPCA_by_cluster = [CPCA() for i in range(self.K)]

            #fit by cluster
            [CPCA_by_cluster[i].fit(MTS_by_cluster[i]) for i in range(self.K)]

            res = np.array([cpca.reconstitution_error(X, self.ncp) for cpca in CPCA_by_cluster])
            #Update index cluster
            index_cluster = res.argmin(axis = 0)

            #new total error 
            new_error = res.min(axis = 0).sum()
            to_continue = (abs(old_error - new_error) > self.conv_crit) & (self.iter_max > i)
            self.converged = np.abs(old_error - new_error) < self.conv_crit

            #Updata
            old_error = new_error 
            i += 1
        self.CPCA_final = CPCA_by_cluster
        return index_cluster

In [599]:
m = Mc2PCA(4,3)
m.fit(X)

array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 0, 0, 0,
       3, 0, 1, 0, 1, 2, 1, 1, 0, 1, 0, 3, 2, 3, 2, 1, 2, 1, 0, 1, 1, 0,
       0, 2, 2, 2, 1, 1, 2, 3, 1, 1, 3, 0, 0, 0, 2, 0, 0, 0, 3, 3, 1, 1,
       1, 1, 0, 0, 1, 2, 2, 2, 1, 0, 2, 2, 0, 0, 1, 1, 0, 2, 0, 0, 0],
      dtype=int64)