In [1]:
import numpy as np 
import pandas as pd
import os
from pathlib import Path
import matplotlib.pyplot as plt 
import pickle


from synthetic_signal import SignalGenerator

from sklearn.model_selection import ParameterGrid
#from experiment import ExperimentSampleEvent

from methods import BasePersistentPattern
from competitors import Baseline,LatentMotif,MatrixProfile,Valmod,PanMatrixProfile
from joblib import Parallel,delayed

# Experiment Parameters

In [2]:
def get_trial(experiment_name,folder_path):
    lst = os.listdir(folder_path)
    lst_idx = []
    if len(lst)>0:
        for filename in lst: 
            if experiment_name in filename: 
                idx = filename.split("_")[-1].split(".")[0]
                lst_idx.append(int(idx))
        if len(lst_idx)>0:
            return np.max(lst_idx) + 1
    return 0 

In [3]:
N_SIGNALS = 5
SCENARIO = "variable_length_motif"

RESULT_FOLDER = Path("./Results/Synthetic/")
TRIAL = get_trial(SCENARIO,RESULT_FOLDER)
BACKUP_PATH = f'./Results/Synthetic/{SCENARIO}_{TRIAL}.csv'
EXPERIMENT_PATH = f'./Results/Synthetic/{SCENARIO}_{TRIAL}.pickle'

In [4]:
SCENARII = {
    "default" : dict(
        n_motifs = 5,
        motif_length=100,
        motif_amplitude=1,
        motif_fundamental =4,
        motif_type ='Sin',
        noise_amplitude=0.1,
        n_novelties=0,
        sparsity=0.25,
        sparsity_fluctuation = 0.2,
        walk_amplitude = 0.1,
        min_rep=5,
        max_rep=5
    ),
    "equal_length_motif" : dict(
        n_motifs = 3,
        motif_length=100,
        motif_amplitude=1,
        motif_fundamental =4,
        motif_type ='Sin',
        noise_amplitude=0.1,
        n_novelties=0,
        sparsity=3,
        sparsity_fluctuation = 0.2,
        min_rep=2,
        max_rep=10
    ),
    "variable_length_motif" : dict(
        n_motifs = 3,
        motif_length=(100,150),
        motif_amplitude=1,
        motif_fundamental =4,
        motif_type ='Sin',
        noise_amplitude=0.1,
        n_novelties=0,
        sparsity=0.5,
        sparsity_fluctuation = 0.8,
        walk_amplitude = 0.1,
        min_rep=2,
        max_rep=5,
    ),
}

# Dataset

In [5]:
#SCENARII = dict(
#    n_motifs = [3],
#    motif_length=[100],
#    motif_amplitude=[1],
#    motif_fundamental =[1],
#    motif_type =['Sin'],
#    noise_amplitude=[0.1],
#    n_novelties=[0,1],
#    sparsity=[0.5],
#    sparsity_fluctuation = [0.2],
#    walk_amplitude = [0.1],
#    min_rep=[2],
#    max_rep=[5]
#)

In [6]:
#n_signals= 11
#config_lst = ParameterGrid(SCENARII)
def generate_dataset(config_lst:list,n_signals:int)->tuple:
    """Generate a dataset

    Args:
        config_lst (list): list of dictionnary, each dictionnary is a configuration
        n_signals (int): the number of signals is evenly distributed among all configurations

    Returns:
        tuple: list of signals, associated labels, associated configurations
    """
    df = pd.DataFrame.from_dict(config_lst)
    n_configs = df.shape[0]
    n_sig_config = np.ceil(n_signals/n_configs).astype(int)
    idxs = np.repeat(np.arange(n_configs),n_sig_config)
    df = df.loc[idxs]
    df = df.reset_index(drop=True)

    configs = df.to_dict(orient="records")
    dataset = []
    labels = []
    for i,config in enumerate(configs): 
        np.random.seed(i)
        signal, label = SignalGenerator(**config).generate()
        dataset.append(signal)
        labels.append(label)
    return dataset,labels,df


In [7]:
import time

from metric import SingleSampleScore,SampleScore,SingleEventScore,EventScore,AdjustedMutualInfoScore
from synthetic_signal import SignalGenerator

In [8]:
class Experiment: 

    def __init__(self,algorithms:list, configurations:list, nbins = 101,njobs=1,verbose = True) -> None:
        """Initialization

        Args:
            algorithms (list): list of algorithm classes
            configurations (list): list of list of configurations as dictionnaries for each algorithms classes
            nbins (int, optional): number of bins for thresholdings. Defaults to 101.
        """
        self.algorithms = algorithms
        self.configurations = configurations
        self.nbins = nbins
        self.njobs = njobs
        self.verbose = verbose

    def compute_scores(self,label,prediction): 

        single_pred = np.clip(np.sum(prediction,axis=0),0,1) 
        single_label = np.clip(np.sum(label,axis=0),0,1)

        scores = []

        #single sample score
        p,r,f = SingleSampleScore().score(single_label,single_pred)
        scores.append(["sss-precision",p])
        scores.append(["sss-recall",r])
        scores.append(["sss-fscore",f])

        #sample score 
        p,r,f = SampleScore().score(label,prediction)
        scores.append(["ss-precision",p])
        scores.append(["ss-recall",r])
        scores.append(["ss-fscore",f])

        # weigthed sample score 
        p,r,f = SampleScore(averaging="weighted").score(label,prediction)
        scores.append(["w-ss-precision",p])
        scores.append(["w-ss-recall",r])
        scores.append(["w-ss-fscore",f])

        #single event score
        lp,lr,lf = SingleEventScore(nbins=self.nbins).all_score(single_label,single_pred)
        thresholds = np.linspace(0,1,self.nbins)
        for t,p,r,f in zip(thresholds,lp,lr,lf): 
            scores.append([f"ses-precision_{np.round(t,2)}",p])
            scores.append([f"ses-recall_{np.round(t,2)}",r])
            scores.append([f"ses-fscore_{np.round(t,2)}",f])
        scores.append(["ses-auc-precision",np.mean(lp)])
        scores.append(["ses-auc-recall",np.mean(lr)])
        scores.append(["ses-auc-fscore",np.mean(lf)])

        #event score
        lp,lr,lf = EventScore(nbins=self.nbins).all_score(label,prediction)
        thresholds = np.linspace(0,1,self.nbins)
        for t,p,r,f in zip(thresholds,lp,lr,lf): 
            scores.append([f"es-precision_{np.round(t,2)}",p])
            scores.append([f"es-recall_{np.round(t,2)}",r])
            scores.append([f"es-fscore_{np.round(t,2)}",f])
        scores.append(["es-auc-precision",np.mean(lp)])
        scores.append(["es-auc-recall",np.mean(lr)])
        scores.append(["es-auc-fscore",np.mean(lf)])

        # weighted event score
        lp,lr,lf = EventScore(nbins=self.nbins).all_score(label,prediction)
        thresholds = np.linspace(0,1,self.nbins)
        for t,p,r,f in zip(thresholds,lp,lr,lf): 
            scores.append([f"w-es-precision_{np.round(t,2)}",p])
            scores.append([f"w-es-recall_{np.round(t,2)}",r])
            scores.append([f"w-es-fscore_{np.round(t,2)}",f])
        scores.append(["w-es-auc-precision",np.mean(lp)])
        scores.append(["w-es-auc-recall",np.mean(lr)])
        scores.append(["w-es-auc-fscore",np.mean(lf)])

        #ajusted mutual information
        scores.append(["amis",AdjustedMutualInfoScore().score(label,prediction)])

        return scores
    
    def signal_algo_class_experiement(self,signal_idx,signal,label,algo_class,config,config_idx): 
        "Return a DF"
        #keep only labels row that are activated by the signal 
        label = label[label.sum(axis=1)>0]

        #update the number of patterns to predict if required
        if "n_patterns" in config.keys():
            if config["n_patterns"] is not None: 
                t_config = config.copy()
                t_config["n_patterns"] = label.shape[0]
        t_config = config.copy()

        
        try:
            #get predictions
            algo = algo_class(**t_config)
            start = time.time()
            algo.fit(signal)
            end = time.time()


            #compute scores
            scores = self.compute_scores(label,algo.prediction_mask_)
            tdf = pd.DataFrame(scores,columns=["metric","score"])
            tdf["algorithm"] = algo_class.__name__
            tdf["config_idx"] = config_idx
            tdf["execution_time"] = end - start
            tdf["signal_idx"] = signal_idx
            if self.verbose: 
                s1 = np.round(tdf[tdf["metric"] == "es-auc-fscore"].score.values[0],2)
                s2 = np.round(tdf[tdf["metric"] == "amis"].score.values[0],2)
                print(f"signal_id: {signal_idx}, algo: {algo_class.__name__}, config_id: {config_idx}, f-auc: {s1}, ami: {s2}")
            
            return tdf 

        except: 
            s= f"signal_id: {signal_idx}, algo: {algo_class.__name__}, config_id: {config_idx} failed to fit."
            if self.verbose: 
                print(s)
            if self.logs_path_ is not None:
                with open(self.logs_path_,"a") as f: 
                    f.write(s +"\n")
            

         

    def run_experiment(self,dataset:np.ndarray,labels:np.ndarray,backup_path = None,batch_size=10,logs_path = None,verbose = True)->np.ndarray:
        """_summary_

        Args:
            dataset (np.ndarray): array of signals, signal shape (L,), variable length allowed
            labels (np.ndarray): array of labels, label shape (L,), variable length allowed
            signal_configs (pd.DataFrame, optional): Dataframe containing the configuration of the synthetic generator for each signals.
            backup_path (str, optional): Path to store df in case of big experiment. If None no saving. Defaults to None.
            verbose (bool, optional): verbose. Defaults to True.

        Returns:
            pd.DataFrame: scores_df
        """
        self.logs_path_ = logs_path
        
        n_signals = len(dataset)
        n_configs = np.sum([len(conf) for conf in self.configurations])
        total = n_signals*n_configs

        if backup_path != None: 
            n_batches  = n_signals//batch_size
            if n_batches >0:
                batches =[zip(dataset[i*batch_size:(i+1)*batch_size],labels[i*batch_size:(i+1)*batch_size]) for i in range(n_batches)]
            else: 
                batches = []
            if n_signals % batch_size !=0: 
                batches.append(zip(dataset[n_batches*batch_size:],labels[n_batches*batch_size:]))
        else:
            batches = [zip(dataset,labels)]

        self.df_ = pd.DataFrame()

        counts = 0
        for batch in batches: 
            results = Parallel(n_jobs=self.njobs)(
                delayed(self.signal_algo_class_experiement)(counts+id_s,signal,label,algo,config,id_c) 
                for id_s,(signal,label) in enumerate(batch) 
                for id_a,algo in enumerate(self.algorithms)
                for id_c,config in enumerate(self.configurations[id_a])
                )
            counts = min(counts+batch_size,n_signals)
            self.df_= pd.concat((self.df_,*results)).reset_index(drop = True)
            self.df_ = self.df_.astype({'metric':str, "score":float, "algorithm":str,'config_idx':int,"signal_idx":int})

            if backup_path != None: 
                self.df_.to_csv(backup_path)

            if verbose:
                print(f"Achieved [{counts*n_configs}/{total}]")

        return self.df_       

#to track

In [9]:
sg = SignalGenerator(**SCENARII[SCENARIO])
sg.generate()
sg.plot()

In [10]:
dataset = []
labels = []
for i in range(N_SIGNALS): 
    np.random.seed(i)
    signal, label = sg.generate()
    dataset.append(signal)
    labels.append(label.astype(int))

# Algorithm Configuration

In [11]:
configs_bpp = ParameterGrid(dict(n_patterns = [3] ,n_neighbors = [5],wlen = [100],alpha = [10], beta = [0],n_jobs = [1]))
configs_bl = ParameterGrid(dict(n_patterns = [3] ,radius = np.linspace(5,10,10),wlen = [100],distance_name = ['NormalizedEuclidean'],n_jobs = [1]))
configs_lm = ParameterGrid(dict(n_patterns = [3] ,radius = np.linspace(10,15,10),wlen = [100], learning_rate = [0.1]))
configs_mp = ParameterGrid(dict(n_patterns = [3] ,radius_ratio = np.linspace(1,5,15),wlen = [150],distance_name = ['NormalizedEuclidean'],n_jobs = [1]))
configs_vm = ParameterGrid(dict(n_patterns = [3] ,radius_ratio = np.linspace(1,5,10),min_wlen = [int(100*0.95)], max_wlen = [int(100*1.05)],distance_name = ['NormalizedEuclidean'],n_jobs = [1]))
configs_test = ParameterGrid(dict(a = [0]))

In [12]:
algorithms = [
    BasePersistentPattern,
    Baseline,
    LatentMotif,
    MatrixProfile,
    Valmod,
]

configurations = [
    configs_bpp,
    configs_bl, 
    configs_lm, 
    configs_mp, 
    configs_vm, 
]

# Experiment

In [13]:
ese = Experiment(algorithms,configurations,njobs=6)
ese.run_experiment(dataset,labels,backup_path=BACKUP_PATH,batch_size=3,logs_path="./Results/Synthetic/logs.txt")
with open(EXPERIMENT_PATH, 'wb') as filehandler: 
    pickle.dump(ese,filehandler)

signal_id: 0, algo: Baseline, config_id: 0, f-auc: 0.32, ami: 0.32
signal_id: 0, algo: Baseline, config_id: 1, f-auc: 0.29, ami: 0.23
signal_id: 0, algo: BasePersistentPattern, config_id: 0, f-auc: 0.73, ami: 0.6
signal_id: 0, algo: Baseline, config_id: 2, f-auc: 0.36, ami: 0.36
signal_id: 0, algo: Baseline, config_id: 3, f-auc: 0.36, ami: 0.26
signal_id: 0, algo: Baseline, config_id: 4, f-auc: 0.4, ami: 0.3
signal_id: 0, algo: Baseline, config_id: 5, f-auc: 0.39, ami: 0.27
signal_id: 0, algo: Baseline, config_id: 6, f-auc: 0.17, ami: 0.06
signal_id: 0, algo: Baseline, config_id: 9, f-auc: 0.15, ami: 0.01
signal_id: 0, algo: Baseline, config_id: 7, f-auc: 0.21, ami: 0.06
signal_id: 0, algo: Baseline, config_id: 8, f-auc: 0.24, ami: 0.09
signal_id: 0, algo: LatentMotif, config_id: 0, f-auc: 0.26, ami: 0.32
signal_id: 0, algo: LatentMotif, config_id: 1, f-auc: 0.25, ami: 0.32
signal_id: 0, algo: LatentMotif, config_id: 3, f-auc: 0.32, ami: 0.44
signal_id: 0, algo: LatentMotif, config_id:

In [21]:
labels = [np.vstack((lab,np.zeros_like(lab))) for lab in labels]
ese1= Experiment(algorithms,configurations,njobs=6)
ese1.run_experiment(dataset,labels,backup_path=BACKUP_PATH,batch_size=3,logs_path="./Results/Synthetic/logs.txt")
with open(EXPERIMENT_PATH, 'wb') as filehandler: 
    pickle.dump(ese1,filehandler)


signal_id: 0, algo: Baseline, config_id: 0, f-auc: 0.32, ami: 0.32
signal_id: 0, algo: BasePersistentPattern, config_id: 0, f-auc: 0.73, ami: 0.6
signal_id: 0, algo: Baseline, config_id: 1, f-auc: 0.29, ami: 0.23
signal_id: 0, algo: Baseline, config_id: 2, f-auc: 0.36, ami: 0.36
signal_id: 0, algo: Baseline, config_id: 3, f-auc: 0.36, ami: 0.26
signal_id: 0, algo: Baseline, config_id: 4, f-auc: 0.4, ami: 0.3
signal_id: 0, algo: Baseline, config_id: 5, f-auc: 0.39, ami: 0.27
signal_id: 0, algo: Baseline, config_id: 6, f-auc: 0.17, ami: 0.06
signal_id: 0, algo: Baseline, config_id: 7, f-auc: 0.21, ami: 0.06
signal_id: 0, algo: Baseline, config_id: 9, f-auc: 0.15, ami: 0.01
signal_id: 0, algo: Baseline, config_id: 8, f-auc: 0.24, ami: 0.09
signal_id: 0, algo: LatentMotif, config_id: 0, f-auc: 0.25, ami: 0.27
signal_id: 0, algo: LatentMotif, config_id: 4, f-auc: 0.31, ami: 0.44
signal_id: 0, algo: LatentMotif, config_id: 1, f-auc: 0.31, ami: 0.35
signal_id: 0, algo: LatentMotif, config_id:

In [None]:
labels