In [1]:
import argparse
import warnings
import pandas as pd
import os
os.system("export PYTHONPATH=`readlink -f ./`")
from config import *
from houses import TEST_HOUSE,TRAIN_HOUSE
from archive.load_tepco import load_house_dataset_by_houses,load_house_dataset_by_houses_ex
from time2graph.utils.base_utils import Debugger
from time2graph.core.model_TEPCO import Time2Graph
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.under_sampling import RandomUnderSampler
from time2graph.core.shapelet_embedding import ShapeletEmbedding
from time2graph.core.shapelet_utils import transition_matrixs,__mat2edgelist,graph_embedding
from time2graph.core.shapelet_utils import shapelet_distance,cross_graph_embedding
from sklearn.preprocessing import minmax_scale
from cross_matrix import cross_matrix,shape_norm,combine_mat
testhouse = [str(i).zfill(3) for i in TEST_HOUSE]
trainhouse = [str(i).zfill(3) for i in TRAIN_HOUSE]

## 參數集合

In [2]:
behav='sleep'
class args:
    pass
args.seg_length,args.num_segment =5,5
args.cutpoints=[(0,3),(2,5)]
args.behav=behav
args.dataset = args.behav
args.K,args.C=20,40
args.opt_metric = 'accuracy'
args.init,args.warp=0,2
args.gpu_enable =True
args.tflag=True
args.embed,args.cmethod ='aggregate','greedy'
args.percentile =10
args.batch_size,args.embed_size =16,16
args.njobs=5
args.optimizer ,args.measurement='Adam','gdtw'
args.alpha,args.beta=0.1,0.05
args.scaled,args.norm=True,False
args.no_global = True
args.multi_graph,args.data_size =False,1
args.kernel,args.feature = 'dts','all'
args.n_splits = 5

In [3]:
if args.kernel=='xgb':
    opt_args= {
                'max_depth': 16,
                'learning_rate': 0.2,
                'scale_pos_weight': 1,
                'booster': 'gbtree'
            }
elif (args.kernel=='dts') or (args.kernel=='rf'):
    opt_args={
                'criterion': 'gini',
                'max_features': 'auto',
                'max_depth': 10,
                'min_samples_split': 4,
                'min_samples_leaf': 3,
                'class_weight': 'balanced'
            }
else:
    raise ValueError("please choose a classifier")

## load data

In [4]:
x_train, y_train, x_test, y_test,z_train,z_test = load_house_dataset_by_houses_ex(
        TEST_HOUSE=testhouse,TRAIN_HOUSE=trainhouse,assign_behavior=behav)
Debugger.info_print('data shape {}x{}'.format(x_train.shape[0],x_train.shape[1]))
Debugger.info_print('training: {:.2f} positive ratio with {}'.format(float(sum(y_train) / len(y_train)),
                                                                        len(y_train)))
Debugger.info_print('test: {:.2f} positive ratio with {}'.format(float(sum(y_test) / len(y_test)),
                                                                    len(y_test)))

[info]data shape 7296x25
[info]training: 0.34 positive ratio with 7296
[info]test: 0.37 positive ratio with 1824


In [5]:
m = Time2Graph(kernel=args.kernel, K=args.K, C=args.C, seg_length=args.seg_length,
                opt_metric=args.opt_metric, init=args.init, gpu_enable=args.gpu_enable,
                warp=args.warp, tflag=args.tflag, mode=args.embed,
                percentile=args.percentile, candidate_method=args.cmethod,
                batch_size=args.batch_size, njobs=args.njobs,
                optimizer=args.optimizer, alpha=args.alpha,
                beta=args.beta, measurement=args.measurement,
                representation_size=args.embed_size, data_size=args.data_size,
                scaled=args.scaled, norm=args.norm, global_flag=args.no_global,
                multi_graph=args.multi_graph,
                shapelets_cache='{}/scripts/cache/{}_{}_{}_{}_shapelets.cache'.format(
                    module_path, 
                    args.dataset, 
                    args.cmethod, args.K, args.seg_length),
                    feature_mode = args.feature,
                    label_all = args.behav,
                    cutpoints=args.cutpoints,
                )

[info]Time2GraphEmbed
[info]initialize t2g model with {'kernel': 'dts', 'kwargs': {'candidate_method': 'greedy', 'njobs': 5, 'optimizer': 'Adam', 'representation_size': 16, 'feature_mode': 'all', 'label_all': 'sleep', 'cutpoints': [(0, 3), (2, 5)]}, 'K': 20, 'C': 40, 'seg_length': 5, 'warp': 2, 'tflag': True, 'opt_metric': 'accuracy', 'mode': 'aggregate', 'batch_size': 16, 'gpu_enable': True, 'percentile': 10, 'shapelets': None, 'sembeds': None, 'clf': None, 'lr': 0.01, 'p': 2, 'alpha': 0.1, 'beta': 0.05, 'multi_graph': False, 'debug': True, 'measurement': 'gdtw', 'verbose': False, 'global_flag': True, 'cutpoints': [(0, 3), (2, 5)]}


### 學習shapelet

In [6]:
cache_dir = '{}/scripts/cache/{}/'.format(module_path, args.dataset)
m.learn__shapelet(X=x_train, Y=y_train,Z=z_train, cache_dir=cache_dir)

[info]learn_shapelets


In [7]:
assert m.t2g.sembeds is None
for k in range(m.data_size):
    m.data_scaler[k].fit(x_train[:, :, k])
X_scaled = np.zeros(x_train.shape, dtype=np.float)
for k in range(m.data_size):
    X_scaled[:, :, k] = m.data_scaler[k].fit_transform(x_train[:, :, k])
X_scaled = np.zeros(x_train.shape, dtype=np.float)
if args.scaled:
    Debugger.info_print('scaled embedding model...')
    inputx=X_scaled
else:
    Debugger.info_print('unscaled embedding model...')
    inputx=x_train

[info]scaled embedding model...


### m.t2g.sembeds 是ShapeletEmbedding

In [8]:
assert m.t2g.shapelets is not None, 'shapelets has not been learnt yet'
m.t2g.sembeds = ShapeletEmbedding(
    seg_length=args.seg_length, tflag=args.tflag, multi_graph=args.multi_graph,
    cache_dir=cache_dir, tanh=False, debug=m.t2g.debug,
    percentile=args.percentile, measurement=args.measurement, mode=args.embed,
    global_flag=args.no_global, 
    **m.t2g.kwargs)


[info]initialize ShapeletEmbedding model with ops: {'seg_length': 5, 'tflag': True, 'multi_graph': False, 'cache_dir': '/home/sun/time2graph-sun/scripts/cache/sleep/', 'tanh': False, 'debug': True, 'percentile': 10, 'dist_threshold': -1, 'measurement': 'gdtw', 'mode': 'aggregate', 'global_flag': True, 'deepwalk_args': {'candidate_method': 'greedy', 'njobs': 5, 'optimizer': 'Adam', 'representation_size': 16, 'feature_mode': 'all', 'label_all': 'sleep', 'cutpoints': [(0, 3), (2, 5)]}, 'embed_size': 16, 'embeddings': [], 'cutpoints': [(0, 3), (2, 5)]}


### 計算前後段的transition_matrix

這邊不太確定y_train == 1還是==0

In [9]:
transition_set=transition_matrixs(
            time_series_set=x_train[np.argwhere(y_train == 1).reshape(-1), :, :], 
            shapelets=m.t2g.shapelets, seg_length=args.seg_length,
            tflag=args.tflag, multi_graph=args.multi_graph, tanh=False, debug=True,
            init=args.init, warp=args.warp, percentile=args.percentile, threshold=-1,
            measurement=args.measurement, global_flag=args.no_global,
            cutpoints=args.cutpoints
            )

[info]weight matrix between shapelet 0 15
[info](2493, 15, 1)


[info]threshold(10) 0.22191153168678296, mean 0.8695386052131653
[info]2493x3
[info]12923 edges involved in shapelets graph
[info]weight matrix between shapelet 10 25
[info](2493, 15, 1)
[info]threshold(10) 0.22166152894496932, mean 0.8698369264602661
[info]2493x3
[info]12898 edges involved in shapelets graph


## 這裡要計算相鄰的兩個cutpoint間的關係

In [10]:
cmat = cross_matrix(time_series_set=x_train[np.argwhere(y_train == 0).reshape(-1), :, :], 
            shapelets=m.t2g.shapelets, seg_length=args.seg_length,
            tflag=args.tflag, multi_graph=args.multi_graph, tanh=False, debug=True,
            init=args.init, warp=args.warp, percentile=args.percentile, threshold=-1,
            measurement=args.measurement, global_flag=args.no_global,
            cutpoints=args.cutpoints)

[info]threshold(10) 0.18452430814504625, mean 0.9766486883163452
[info]4803x3
[info]75309 edges involved in shapelets graph


In [11]:
tcmat=combine_mat(transition_set[0][0],transition_set[1][0],cmat)

In [12]:
from time2graph.core.shapelet_utils import shapelet_distance,cross_graph_embedding
emb1,emb2 =cross_graph_embedding(
        tmat=shape_norm(tcmat,args.K*2), num_shapelet=len(m.t2g.shapelets)*2, embed_size=args.embed_size,
        cache_dir=cache_dir, **m.t2g.sembeds.deepwalk_args)
m.t2g.sembeds.embeddings.append(emb1)
m.t2g.sembeds.embeddings.append(emb2)

[info]transition matrix size (1, 40, 40)
[info]run deepwalk with: deepwalk --input /home/sun/time2graph-sun/scripts/cache/sleep//0.edgelist --format weighted_edgelist --output /home/sun/time2graph-sun/scripts/cache/sleep//0.embeddings --representation-size 16
[[-1.67871737e+00 -1.90484858e+00 -1.34096396e+00 -1.18177629e+00
   1.56899190e+00  1.25473762e+00  2.02791834e+00  2.00787798e-01
   1.27225626e+00 -1.32337785e+00  4.56721306e-01  1.46410954e+00
   1.11039646e-01  1.79812357e-01  8.81413996e-01 -2.14664054e+00]
 [ 5.39426208e-01  1.38078421e-01  1.17196763e+00  1.21170115e+00
   4.27676111e-01 -1.40728140e+00 -3.17908108e-01 -2.50936985e-01
   1.44299757e+00 -8.26397717e-01  1.36460960e+00 -8.89445961e-01
   1.97840381e+00 -3.26963496e+00 -1.69494879e+00 -2.57926166e-01]
 [-6.53797761e-02 -6.27802432e-01 -2.33495951e-01  4.97731179e-01
   1.23359787e+00  3.96115869e-01  1.54930496e+00  7.41755545e-01
   1.62643838e+00 -1.24318969e+00  1.17070889e+00  7.00687647e-01
   2.3923804

ValueError: cannot reshape array of size 640 into shape (1,20,16)

## 做embedding，之後要修正

In [None]:
# for idx,transition in enumerate(transition_set):
#     tmat, sdist, dist_threshold = transition
#     tmat = shape_norm(tmat=tmat,num_shapelet=args.K)
#     m.t2g.sembeds.dist_threshold = dist_threshold
#     m.t2g.sembeds.embeddings.append(
#         graph_embedding(
#         tmat=tmat, num_shapelet=len(m.t2g.shapelets), embed_size=args.embed_size,
#         cache_dir=cache_dir, **m.t2g.sembeds.deepwalk_args)
#     )

[info]transition matrix size (1, 20, 20)
[info]run deepwalk with: deepwalk --input /home/sun/time2graph-sun/scripts/cache/sleep//0.edgelist --format weighted_edgelist --output /home/sun/time2graph-sun/scripts/cache/sleep//0.embeddings --representation-size 16
[info]transition matrix size (1, 20, 20)
[info]run deepwalk with: deepwalk --input /home/sun/time2graph-sun/scripts/cache/sleep//0.edgelist --format weighted_edgelist --output /home/sun/time2graph-sun/scripts/cache/sleep//0.embeddings --representation-size 16


In [None]:
x = m.extract_features(X=x_train,Z=z_train, init=args.init,mode=args.feature)
max_accu, max_prec, max_recall, max_f1, max_metric = -1, -1, -1, -1, -1
metric_measure = m.return_metric_method(opt_metric=m.t2g.opt_metric)
m.train_classfit(x=x,Y=y_train,Z=z_train,n_splits=5,opt_args=opt_args)

[info]embedding threshold 0.22166152894496932
[info]sdist size (7296, 3, 20)
[info]sdist size (7296, 3, 20)
[info]using setup parameters executed by 100.00%
[info]classifier fit


In [None]:
y_pred = m.predict(X=x_test,Z=z_test)[0]

[info]embedding threshold 0.22166152894496932
[info]sdist size (1824, 3, 20)
[info]sdist size (1824, 3, 20)
[debug]time series embedding executed by 100.00%

In [None]:
Debugger.dc_print('{}\n{:.2f} positive ratio\nresult: accu {:.4f}, prec {:.4f}, recall {:.4f}, f1 {:.4f}'.format(
        args.cutpoints,float(sum(y_test) / len(y_test)),                                                                           
            accuracy_score(y_true=y_test, y_pred=y_pred),
            precision_score(y_true=y_test, y_pred=y_pred),
            recall_score(y_true=y_test, y_pred=y_pred),
            f1_score(y_true=y_test, y_pred=y_pred)
        ))

訊息已成功發送到Discord Webhook！
