In [99]:
# https://www.sktime.net/en/v0.24.1/examples/transformation/rocket.html
from sktime.datatypes import check_raise, convert_to
from sktime.transformations.panel.rocket import Rocket
from matplotlib import pyplot as plt
from sktime.utils.plotting import plot_series
from sklearn.linear_model import RidgeClassifierCV, RidgeCV
import ipywidgets as widgets
import numpy as np
import pandas as pd
import os
from sktime.pipeline import make_pipeline

In [33]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [34]:
# Load all the dataframes for either test or training
# Ensure all their missing columns can be filled in with zeros
# Make a list of them, use check_raise and the df-list mtype to ensure they are valid
# Pass to ROCKET pipeline as seen

In [35]:
def load_invidual_instance(filename, needed_columns):
    df = pd.read_csv(filename)
    for col in needed_columns:
         if not (col in df.columns):
            df[col] = 0.0
    return df

In [52]:
def create_combined_data(base_dir, filenames, needed_columns):
    combined_data_m = map(lambda file: load_invidual_instance(base_dir + "/" + file, needed_columns), filenames)
    combined_data = list(combined_data_m)
    print("Check data: ",check_raise(combined_data, mtype="df-list"))
    return combined_data

In [77]:
needed_columns = ['dosAttackTrigger', 'distortBaseScanPMB2', 'distortBaseScanOmni', 'distortBaseScanTiago', 'packetLossLaserScanOmni', 'packetLossLaserScanPMB2', 'packetLossLaserScanTiago', 'delayBaseScanOmni', 'delayBaseScanPMB2', 'delayBaseScanTiago1']
ddd = create_combined_data("/tmp/testCSV/train/", ["Test_001_18_01_2024_05_28_25.csv"], needed_columns)
ddd

Check data:  True


[     dosAttackTrigger  distortBaseScanPMB2  delayBaseScanOmni  \
 0                 0.0                  0.0                0.0   
 1                 0.0                  0.0                0.0   
 2                 0.0                  0.0                0.0   
 3                 0.0                  0.0                0.0   
 4                 0.0                  0.0                0.0   
 ..                ...                  ...                ...   
 295               0.0                  0.0                0.0   
 296               0.0                  0.0                0.0   
 297               0.0                  0.0                0.0   
 298               0.0                  0.0                0.0   
 299               0.0                  0.0                0.0   
 
      delayBaseScanPMB2  distortBaseScanOmni  distortBaseScanTiago  \
 0                  0.0                  0.0                   0.0   
 1                  0.0                  0.0                   0.0

In [140]:
def find_data_in_dir(csv_directory_train, csv_directory_test, train_split_num):
    files_train = os.listdir(csv_directory_train)
    files_test = os.listdir(csv_directory_test)
    return files_train, files_test

In [141]:
def rocket_complete(csv_directory_train, csv_directory_test, train_split_num, needed_columns, num_kernels):
    files_train = os.listdir(csv_directory_train)
    files_test = os.listdir(csv_directory_test)
    train_data = create_combined_data(csv_directory_train, files_train, needed_columns)
    test_data = create_combined_data(csv_directory_test, files_test, needed_columns)
    train_metrics = pd.read_csv("/tmp/testCSV/metrics-train.csv")
    test_metrics = pd.read_csv("/tmp/testCSV/metrics-test.csv")
    for d in train_data:
        print("Shape:", d.shape)
    rocket_pipeline_classifier = make_pipeline(Rocket(num_kernels=num_kernels), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)))
    rocket_pipeline = make_pipeline(Rocket(num_kernels=num_kernels), RidgeCV(alphas=(0.1, 1.0, 10.0)))
    
    rocket_pipeline.fit(train_data, train_metrics)
    print("ROCKET fit done!")
    score = rocket_pipeline.score(test_data, test_metrics)
    print("Score on test data = ",score)
    print("ROCKET score done!")
    
    rocket_pipeline_classifier.fit(train_data, train_metrics)
    print("ROCKET classifier fit done!")
    score = rocket_pipeline_classifier.score(test_data, test_metrics)
    print("Score on test data = ",score)
    print("ROCKET classifier score done!")
    return rocket_pipeline_classifier
    

In [142]:
split_num = 20
num_ops = 4
num_kernels = 10000

In [144]:
csv_directory_test = "/tmp/testCSV/test/"
needed_columns = ['dosAttackTrigger', 'distortBaseScanPMB2', 'distortBaseScanOmni', 'distortBaseScanTiago', 'packetLossLaserScanOmni', 'packetLossLaserScanPMB2', 'packetLossLaserScanTiago', 'delayBaseScanOmni', 'delayBaseScanPMB2', 'delayBaseScanTiago1']
classifier = rocket_complete("/tmp/testCSV/train/", "/tmp/testCSV/test/", split_num, needed_columns, num_kernels)
test_data = create_combined_data(csv_directory_test, files_test, needed_columns)

Check data:  True
Check data:  True
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
Shape: (300, 10)
ROCKET fit done!
Score on test data =  -0.24808647700913733
ROCKET score done!
ROCKET classifier fit done!
Score on test data =  0.3409090909090909
ROCKET classifier score done!


['EST_TYPE',
 'EST_TYPE_PLURAL',
 'METADATA_REQ_IN_CHECKS',
 'TASK',
 'VECTORIZATION_ATTR',
 '_X_metadata',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_alias_params',
 '_anytag_notnone_set',
 '_anytag_notnone_val',
 '_anytagis',
 '_anytagis_then_set',
 '_check_capabilities',
 '_check_convert_X_for_predict',
 '_check_estimators',
 '_check_input',
 '_check_names',
 '_check_y',
 '_class_dictionary',
 '_clone_flags',
 '_coerce_estimator_tuple',
 '_components',
 '_config',
 '_config_doc',
 '_config_dynamic',
 '_convert_X',
 '_convert_X_to_sklearn',
 '_convert_output_y',
 '_converter_store_y',
 '_dunder_concat',
 '_estimator_typ

In [59]:
metrics_train = pd.read_csv("/tmp/testCSV/metrics-train.csv")
metrics_train

Unnamed: 0,M1_countObjectsMissed
0,5.0
1,3.0
2,4.0
3,3.0
4,1.0
5,3.0
6,1.0
7,2.0
8,3.0
9,4.0


In [60]:
metrics_test = pd.read_csv("/tmp/testCSV/metrics-test.csv")
metrics_test

Unnamed: 0,M1_countObjectsMissed
0,3.0
1,4.0
2,3.0
3,1.0
4,4.0
5,3.0
6,2.0
7,1.0
8,1.0
9,4.0
