In [23]:
#https://medium.com/dida-machine-learning/inside-x-rocket-explaining-the-explainable-rocket-534b104c4a08

In [24]:
# https://www.sktime.net/en/v0.24.1/examples/transformation/rocket.html
from sktime.datatypes import check_raise, convert_to
from sktime.transformations.panel.rocket import Rocket, MiniRocket, MiniRocketMultivariate
from matplotlib import pyplot as plt
from sktime.utils.plotting import plot_series
from sklearn.linear_model import RidgeClassifierCV, RidgeCV, SGDClassifier, SGDRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, LinearSVR
import ipywidgets as widgets
import numpy as np
import pandas as pd
import os
from sktime.pipeline import make_pipeline

In [25]:
#import ipywidgets as widgets
#from ipywidgets import interact, interact_manual

In [26]:
# Load all the dataframes for either test or training
# Ensure all their missing columns can be filled in with zeros
# Make a list of them, use check_raise and the df-list mtype to ensure they are valid
# Pass to ROCKET pipeline as seen

In [27]:
def load_invidual_instance(filename, needed_columns):
    df = pd.read_csv(filename)
    for col in needed_columns:
         if not (col in df.columns):
            df[col] = 0.0
    return df

In [28]:
def create_combined_data(base_dir, filenames, needed_columns):
    combined_data_m = map(lambda file: load_invidual_instance(base_dir + "/" + file, needed_columns), filenames)
    combined_data = list(combined_data_m)
    print("Check data: ",check_raise(combined_data, mtype="df-list"))
    return combined_data

In [29]:
def find_data_in_dir(csv_directory_train, csv_directory_test, train_split_num):
    files_train = os.listdir(csv_directory_train)
    files_test = os.listdir(csv_directory_test)
    return files_train, files_test

In [30]:
def single_metric_value(m, divisor):
    f = np.floor(m/divisor)
    if (f < 0):
        return 0.0
    else:
        return f

In [57]:
def metrics_map_to_class(metrics, divisor):
      return np.array([single_metric_value(v,divisor) for v in metrics])

In [73]:
def rocket_complete(csv_directory_train, csv_directory_test, train_mfile, test_mfile, needed_columns, num_kernels, metric_divisor):
    files_train = sorted(os.listdir(csv_directory_train))
    files_test = sorted(os.listdir(csv_directory_test))
    train_data = create_combined_data(csv_directory_train, files_train, needed_columns)
    test_data = create_combined_data(csv_directory_test, files_test, needed_columns)
    train_metrics = pd.read_csv(train_mfile)
    test_metrics = pd.read_csv(test_mfile)
    train_class = metrics_map_to_class(train_metrics[metric_name], metric_divisor)
    test_class = metrics_map_to_class(test_metrics[metric_name], metric_divisor)
    
    for d in train_data:
        print("Shape:", d.shape)
    #rocket_pipeline_classifier = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), RidgeClassifierCV(alphas = np.logspace(-3, 3, 10)))
    #rocket_pipeline = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), RidgeCV(alphas=(0.1, 1.0, 10.0)))
    rocket_pipeline_classifier = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), StandardScaler(with_mean=False), RidgeClassifierCV(alphas = np.logspace(-3, 3, 10)))
    rocket_pipeline = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), StandardScaler(with_mean=False), RidgeCV(alphas=(0.1, 1.0, 10.0)))
    #rocket_pipeline_classifier = make_pipeline(MiniRocket(num_kernels=num_kernels), LinearSVC())
    #rocket_pipeline = make_pipeline(MiniRocket(num_kernels=num_kernels), LinearSVR())
    
    rocket_pipeline.fit(train_data, train_metrics)
    print("ROCKET fit done!")
    score = rocket_pipeline.score(test_data, test_metrics)
    print("Score on test data = ",score)
    print("ROCKET score done!")
    
    rocket_pipeline_classifier.fit(train_data, train_class)
    print("ROCKET classifier fit done!")
    score = rocket_pipeline_classifier.score(test_data, test_class)
    print("Score on test data = ",score)
    print("ROCKET classifier score done!")
    return [rocket_pipeline, rocket_pipeline_classifier]
    

In [74]:
#base_dir = "./temp-data-1000/"
#metric_name = "uvExposureForHuman"
#needed_columns = ['personDetectionDelay', 'personDetectionPacketLoss', 'personDetectionPacketLossAll', 'distortPersonSector']


In [43]:
base_dir = "./temp-data-400-fuzzoptimes/"
metric_name = "fuzzingOperationTimes"
metric_divisor_for_class = 50
needed_columns = ['dosAttackTrigger', 'distortBaseScanPMB2', 'distortBaseScanOmni', 'distortBaseScanTiago', 'packetLossLaserScanOmni', 'packetLossLaserScanPMB2', 'packetLossLaserScanTiago', 'delayBaseScanOmni', 'delayBaseScanPMB2', 'delayBaseScanTiago1']

In [67]:
base_dir = "./temp-data-400-objectsdelivered/"
metric_name = "M1_countObjectsDelivered"
metric_divisor_for_class = 2
needed_columns = ['dosAttackTrigger', 'distortBaseScanPMB2', 'distortBaseScanOmni', 'distortBaseScanTiago', 'packetLossLaserScanOmni', 'packetLossLaserScanPMB2', 'packetLossLaserScanTiago', 'delayBaseScanOmni', 'delayBaseScanPMB2', 'delayBaseScanTiago1']

In [75]:
base_dir = "./temp-data-400-misseddeliveries/"
metric_name = "M1_countObjectsMissed"
metric_divisor_for_class = 2
needed_columns = ['dosAttackTrigger', 'distortBaseScanPMB2', 'distortBaseScanOmni', 'distortBaseScanTiago', 'packetLossLaserScanOmni', 'packetLossLaserScanPMB2', 'packetLossLaserScanTiago', 'delayBaseScanOmni', 'delayBaseScanPMB2', 'delayBaseScanTiago1']

In [76]:
csv_directory_train = base_dir + "/train"
csv_directory_test = base_dir + "/test"
mfile_train = base_dir + "metrics-train.csv"
mfile_test = base_dir + "metrics-test.csv"

num_kernels = 84*10

[rocket, classifier] = rocket_complete(csv_directory_train, csv_directory_test, mfile_train, mfile_test, needed_columns, num_kernels, metric_divisor_for_class)
rocket
classifier
#test_data = create_combined_data(csv_directory_test, files_test, needed_columns)

Check data:  True
Check data:  True
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500, 10)
Shape: (500,

In [69]:
def compare_prediction_and_actual(classifier, divisor_for_metric_class):
    files_test = sorted(os.listdir(csv_directory_test))
    test_num_max = len(files_test)
    test_data = create_combined_data(csv_directory_test, files_test, needed_columns)
    test_metrics = pd.read_csv(mfile_test)
    actual_class = metrics_map_to_class(test_metrics[metric_name], divisor_for_metric_class)
    actual_val = test_metrics[metric_name]
    predicted_class = classifier.predict(test_data)
    predicted_val = rocket.predict(test_data)
    predicted_vs_actual = pd.DataFrame({'predicted_class':predicted_class, 'actual_class':actual_class, 'predicted_val':predicted_val, 'actual_val':actual_val}, columns = ['predicted_class', 'actual_class', 'predicted_val', 'actual_val'])    
    return predicted_vs_actual

In [70]:
pd.options.display.max_rows = 250
compare_prediction_and_actual(classifier, metric_divisor_for_class)

Check data:  True


Unnamed: 0,predicted_class,actual_class,predicted_val,actual_val
0,3.0,3.0,109.540309,7.0
1,4.0,2.0,7.51611,4.0
2,3.0,2.0,46.65088,5.0
3,4.0,3.0,13.978135,7.0
4,5.0,5.0,9.53152,10.0
5,2.0,4.0,15.847682,9.0
6,4.0,3.0,78.372145,6.0
7,3.0,3.0,17.772132,6.0
8,3.0,2.0,-19.282947,5.0
9,4.0,3.0,-81.332409,6.0


In [26]:
files_test = sorted(os.listdir(csv_directory_test))
print(files_test[6])
test_metrics = pd.read_csv(mfile_test)
test_metrics["fuzzingOperationTimes"]

Test_327_11_07_2024_03_59_48.csv


0      13.657376
1      38.757326
2      41.793253
3      31.204708
4       5.562568
5       3.222495
6     187.815020
7      44.790383
8     239.748391
9     146.770337
10    235.330155
11    385.649338
12    173.330531
13      2.006682
14     60.526886
15     22.631648
16     19.953518
17     86.968564
18     67.733317
19    312.915728
20     16.626914
21     27.063103
22    233.294096
23     56.855600
24      0.616974
25    396.620025
26      1.601168
27      3.306009
28     25.453415
29     47.553682
30      7.983221
31     81.452557
32     10.694518
33     28.432047
34    454.638856
35     24.125265
36    411.641853
37     68.616866
38      3.178429
39     53.719420
40      3.424414
41    198.683861
42    346.485424
43      7.566260
44     23.661857
45    208.874828
46      1.998849
47     29.948021
48    295.391902
49     51.790899
50     13.259638
51     35.634269
52    364.631637
53     66.531568
54      1.367351
55      3.244231
56      0.624051
57    103.187379
58     12.8709