In [92]:
%pip install sktime==0.11.3








In [93]:
import numpy as np
import pandas as pd
import timeit
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import fbeta_score, make_scorer
import tensorflow as tf
from pickle import dump
from sklearn.model_selection import KFold


#Import Models
from sktime.classification.hybrid import HIVECOTEV2
from sktime.classification.hybrid import HIVECOTEV1
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import RandomIntervalSpectralEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime._contrib.vector_classifiers._rotation_forest import RotationForest

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier


from sktime.datasets import load_from_arff_to_dataframe as load_arff
#numpy.version.version

# Rocket Classifier code

In [94]:
%pip install tsai

Note: you may need to restart the kernel to use updated packages.








In [95]:
#%pip install sklearn --upgrade



In [96]:
import sklearn
from sklearn.linear_model import RidgeClassifierCV, RidgeCV
from sklearn.metrics import make_scorer
from tsai.imports import *
from tsai.data.external import *
from tsai.models.layers import *

In [97]:
#export
class RocketClassifier(sklearn.pipeline.Pipeline):
    """Time series classification using ROCKET features and a linear classifier"""
    
    def __init__(self, num_kernels=10_000, normalize_input=True, random_state=None, 
                 alphas=np.logspace(-3, 3, 7), normalize_features=True, memory=None, verbose=False, scoring=None, class_weight=None, **kwargs):
        """
        RocketClassifier is recommended for up to 10k time series. 
        For a larger dataset, you can use ROCKET (in Pytorch).
        scoring = None --> defaults to accuracy.
        
        Rocket args:            
            num_kernels     : int, number of random convolutional kernels (default 10,000)
            normalize_input : boolean, whether or not to normalise the input time series per instance (default True)
            random_state    : Optional random seed (default None)

        """
        try: 
            import sktime
            from sktime.transformations.panel.rocket import Rocket
        except ImportError:
            print("You need to install sktime to be able to use RocketClassifier")
            
        self.steps = [('rocket', Rocket(num_kernels=num_kernels, normalise=normalize_input, random_state=random_state)),
                      ('ridgeclassifiercv', RidgeClassifierCV(alphas=alphas, normalize=normalize_features, scoring=scoring, 
                                                              class_weight=class_weight, **kwargs))]
        store_attr()
        self._validate_steps()

    def __repr__(self):  
        return f'Pipeline(steps={self.steps.copy()})'

    def save(self, fname='Rocket', path='./models'):
        path = Path(path)
        filename = path/fname
        with open(f'{filename}.pkl', 'wb') as output:
            pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)

In [98]:
#export
def load_rocket(fname='Rocket', path='./models'):
    path = Path(path)
    filename = path/fname
    with open(f'{filename}.pkl', 'rb') as input:
        output = pickle.load(input)
    return output

In [99]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [100]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

In [101]:
def create_dt_metrics():
#     columns = ['KNeighborsTimeSeriesClassifier','ShapeletTransformClassifier',
#                                     'TimeSeriesForestClassifier','HIVECOTE1','HIVECOTE2','RISE',
#                                     'ROCKET','BOSSEnsemble']
    columns = ['BOSSEnsemble']
    df_precision = pd.DataFrame(data= None,
                           columns=columns)
    df_accuracy  = pd.DataFrame(data= None,
                           columns=columns)
    df_f1        = pd.DataFrame(data= None,
                           columns=columns)
    df_recall    = pd.DataFrame(data= None,
                           columns=columns)
    df_std       = pd.DataFrame(data= None,
                           columns=columns)
    return df_precision, df_accuracy, df_f1, df_recall, df_std

In [102]:
def populate_df_metrics(precision, accuracy, f1, recall):
    print(f"Precision in populate_df_metrics: {precision}")
    df_precision = df_precision.append(precision, ignore_index=True)
    print(f"Df precision after append: {df_precision}")
    df_accuracy.append(pd.DataFrame(accuracy))
    df_f1.append(pd.DataFrame(f1))
    df_recall.append(pd.DataFrame(recall))
    #return df_precision, df_accuracy, df_f1, df_recall
  
def save_std(df_accuracy,df_std):
    for row in df_accuracy:
        df_std[row] = np.std(df_accuracy[row])
        print(f"STD COL: {df_std[row]}")
    print(f"STD antes do %save: {df_std}")
    df_std.to_csv("results4/" + 'df_all_std.csv', index=False)
    print(f"Save STD: {df_std}")

In [103]:
def create_models(models):
#     models.append(('KNeighborsTimeSeriesClassifier',KNeighborsTimeSeriesClassifier(n_neighbors=10)))
     #n_neighbors=1, n_neighbors=5, n_neighbors=10
#     models.append(('ShapeletTransformClassifier',ShapeletTransformClassifier(
#         estimator=RotationForest(n_estimators=6),
#         n_shapelet_samples=500,
#         max_shapelets=20,
#         batch_size=100,
#     )))
#     n_estimators=200, n_estimators=3, n_estimators=400 
#     models.append(('TimeSeriesForestClassifier',TimeSeriesForestClassifier()))
#     models.append(('HIVECOTE1',HIVECOTEV1(
#                 stc_params={
#                     "estimator": RotationForest(n_estimators=3),
#                     "n_shapelet_samples": 100,
#                     "max_shapelets": 10,
#                     "batch_size": 20,
#                 },
#                 tsf_params={"n_estimators": 3},
#                 rise_params={"n_estimators": 3},
#                 cboss_params={"n_parameter_samples":1, "max_ensemble_size": 1, "min_window":1, "max_win_len_prop":1},
#                 verbose=10
#             )))
#     models.append(('HIVECOTE2',HIVECOTEV2(
#         stc_params={
#             "estimator": RotationForest(n_estimators=50),
#             "n_shapelet_samples": 100,
#             "max_shapelets": 10,
#             "batch_size": 20,
#         },
#         drcif_params={"n_estimators": 50, "n_intervals": 2, "att_subsample_size": 2},
#         arsenal_params={"num_kernels": 50, "n_estimators": 50},
#         tde_params={
#             "n_parameter_samples": 10,
#             "max_ensemble_size": 3,
#             "randomly_selected_params": 5,
#             "max_win_len_prop":2,
#             "min_window":1,
#         },
#         verbose=10
#     )))
    #n_estimators=50, n_estimators=3, n_estimators=100 
#     models.append(('RISE',RandomIntervalSpectralEnsemble()))
#     models.append(('ROCKET',RocketClassifier()))
    models.append(('BOSSEnsemble',BOSSEnsemble(max_ensemble_size=10,min_window =9, max_win_len_prop=18)))
    return models

In [104]:
#save metrics
def save_results(df_to_save, name, model, train_val_test):
    #save the model
    print("Saving model...")
    dump(model, open('results4/'+name+'_'+train_val_test+'_model.pkl', 'wb'))
    print("Saving metrics...")
    print(f"Df accuracy: {df_to_save}")
    df_to_save.to_csv("results4/" + 'df_'+name+'_'+train_val_test+'.csv', index=False)
    print("Accuracy of models:")
    print(df_to_save)

In [105]:
import copy
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
kf = RepeatedKFold(n_splits=5, n_repeats=2, random_state=42)
X, y = load_arff("AllCandidas_TRAIN_V3_Reduced.arff")
#X_train1, X_val, y_train1, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
models =  []
models = create_models(models)
def gpu(X, y, kf, models):
    scoring = ['accuracy','precision_macro', 'recall_macro','f1_macro']
    df_precision, df_accuracy, df_f1, df_recall, df_std = create_dt_metrics()
    with tf.device('/device:GPU:0'):
        i= 0
        df_precision, df_accuracy, df_f1, df_recall, df_std = create_dt_metrics()
        print(f"Quantidade de modelos {len(models)}")
        for name, model in models:
            print(f"Treining {name}...")
            print("In trainning...")
            model.fit(X_train, y_train)
            print("In prediction...")
            #predictions = model.predict(X_test)
            print("Finished prediction!")
            df_train = cross_validate(model, X_train, y_train, cv=kf, scoring=scoring,return_train_score=True)
            print(df_train.keys())
            save_results(pd.DataFrame(df_train),name, model,'train')
            print("***********")
            print("score val")
            print()
            df_validation = cross_validate(model, X_val, y_val, cv=kf, scoring=scoring)
            print(df_validation.keys())
            save_results(pd.DataFrame(df_validation), name, model,'val')


In [106]:
if tf.config.list_physical_devices('GPU'):
    print("Using MirroredStrategy")
    strategy = tf.distribute.MirroredStrategy()
else:  # Use the Default Strategy
    print("Using Default Strategy")
    strategy = tf.distribute.get_strategy()
with strategy.scope():
    # Do something interesting
    print(tf.Variable(1.))
    gpu(X_train,y_train,kf,models)

Using MirroredStrategy
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
MirroredVariable:{
  0: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
}
Quantidade de modelos 1
Treining BOSSEnsemble...
In trainning...
In prediction...
Finished prediction!
Len X:                                                                                                                                                                                                                                                                                                                                                       dim_0
4  0     102200.0
1     101100.0
2     101900.0
3       7305.0
4       7305.0
5       7305.0
6       6014.0
7       6020.0
8       6026.0
9      62470.0
10     62310.0
11     62390.0
12      3571.0
13      3553.0
14      3545.0
15     26180.0
16     26080.0
17     25960.0
18      7818.0
19      7816.0
20      7812.0
21         1

ValueError: Cannot have number of splits n_splits=5 greater than the number of samples: n_samples=4.

### Executar isso só no final

In [None]:
def test_final_model(X_test, y_test, models):
    scoring = ['accuracy','precision_macro', 'recall_macro','f1_macro']
    for name, model in models:
        df_test = cross_validate(model, X_test, y_test, cv=kf, scoring=scoring)
        print(df_test.keys())
        save_results(pd.DataFrame(df_test),name, model,'test')
        print("Finished test!")

In [None]:
X_test, y_test = load_arff("AllCandidas_TEST_V3.arff")
test_final_model(X_test,y_test,models) #Executar isso só no final