In [2]:
from sktime.datatypes import check_raise, convert_to
from sktime.transformations.panel.rocket import Rocket, MiniRocket, MiniRocketMultivariate
from matplotlib import pyplot as plt
from sktime.utils.plotting import plot_series
from sklearn.linear_model import RidgeClassifierCV, RidgeCV, SGDClassifier, SGDRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC, LinearSVR
import ipywidgets as widgets
import numpy as np
import pandas as pd
import os
from sktime.pipeline import make_pipeline

In [3]:
rng = np.random.default_rng()

In [4]:
def generate_series_numpy(rng, max_regions, time_len):
    series = np.zeros(time_len)
    for i in range(0,max_regions):
        n = int(np.floor(rng.random()*time_len))
        m = int(np.floor(rng.random()*time_len))
        start = np.min([n,m])
        end = np.max([n,m])
        for i in range(start,end+1):
            series[i] = 1.0
    return series

In [5]:
def count_nonzero_column(s):
    # Need to count it 
    # Counts all the non zero values
    return np.count_nonzero(s)

In [6]:
def generate_univariate_dataframe(rng, max_regions, time_len):
    return pd.DataFrame(generate_series_numpy(rng, max_regions, time_len))

In [7]:
def generate_multivariate_dataframe(rng, max_regions, time_len, series_count):
    # wrong way round, need to transport it
    orig = pd.DataFrame([generate_series_numpy(rng, max_regions, time_len) for _ in range(series_count)])
    return orig.T    

In [8]:
generate_multivariate_dataframe(rng, 1, 500, 3)

Unnamed: 0,0,1,2
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
...,...,...,...
495,0.0,0.0,0.0
496,0.0,0.0,0.0
497,0.0,0.0,0.0
498,0.0,0.0,0.0


In [9]:
def count_proportion_univariate(df):
    # Counts all the non zero values in single column
    return np.count_nonzero(df) / len(df)

In [81]:
def count_proportion_multivariate(df, num_columns):
    # Counts all the non zero values across multiple columns
    sum_set = sum(df.aggregate(func=count_nonzero_column, axis='columns'))
    return sum_set / (len(df) * num_columns)

In [82]:
def y_to_class(y, divisor):
    # Converts it to a class value
    return np.floor(y / divisor)

In [83]:
def create_univariate_synthetic_training_data(rng, time_length, max_regions, train_count, test_count, class_div):
    train_x = [generate_univariate_dataframe(rng, max_regions, time_length) for _ in range(train_count)]
    test_x = [generate_univariate_dataframe(rng, max_regions, time_length) for _ in range(test_count)]
    train_y = np.array(list(map((lambda s: count_proportion_univariate(s)), train_x)))
    test_y = np.array(list(map((lambda s: count_proportion_univariate(s)), test_x)))
    train_class = np.array(list(map((lambda y: y_to_class(y, class_div)), train_y)))
    test_class = np.array(list(map((lambda y: y_to_class(y, class_div)), test_y)))
    
    # Check valid formats
    print("Check data: ",check_raise(train_x, mtype="df-list"))
    print("Check data: ",check_raise(test_x, mtype="df-list"))
    print("Check data: ",check_raise(train_y, mtype="np.ndarray"))
    print("Check data: ",check_raise(test_y, mtype="np.ndarray"))
    print("Check data: ",check_raise(train_class, mtype="np.ndarray"))
    print("Check data: ",check_raise(test_class, mtype="np.ndarray"))
    return train_x, test_x, train_y, test_y, train_class, test_class

In [84]:
def create_multivariate_synthetic_training_data(rng, time_length, max_regions, train_count, test_count, class_div, series_count):
    train_x = [generate_multivariate_dataframe(rng, max_regions, time_length, series_count) for _ in range(train_count)]
    test_x = [generate_multivariate_dataframe(rng, max_regions, time_length, series_count) for _ in range(test_count)]
    train_y = np.array(list(map((lambda s: count_proportion_multivariate(s, series_count)), train_x)))
    test_y = np.array(list(map((lambda s: count_proportion_multivariate(s, series_count)), test_x)))
    train_class = np.array(list(map((lambda y: y_to_class(y, class_div)), train_y)))
    test_class = np.array(list(map((lambda y: y_to_class(y, class_div)), test_y)))
    
    # Check valid formats
    print("Check data: ",check_raise(train_x, mtype="df-list"))
    print("Check data: ",check_raise(test_x, mtype="df-list"))
    print("Check data: ",check_raise(train_y, mtype="np.ndarray"))
    print("Check data: ",check_raise(test_y, mtype="np.ndarray"))
    print("Check data: ",check_raise(train_class, mtype="np.ndarray"))
    print("Check data: ",check_raise(test_class, mtype="np.ndarray"))
    return train_x, test_x, train_y, test_y, train_class, test_class

In [85]:
train_x, test_x, train_y, test_y, train_class, test_class = create_univariate_synthetic_training_data(rng, 30, 1, 1, 1, 0.1)
train_x

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True


[      0
 0   0.0
 1   0.0
 2   0.0
 3   0.0
 4   0.0
 5   0.0
 6   0.0
 7   0.0
 8   0.0
 9   0.0
 10  0.0
 11  0.0
 12  0.0
 13  1.0
 14  1.0
 15  1.0
 16  1.0
 17  1.0
 18  1.0
 19  1.0
 20  1.0
 21  1.0
 22  1.0
 23  1.0
 24  1.0
 25  1.0
 26  1.0
 27  1.0
 28  0.0
 29  0.0]

In [86]:
train_class

array([5.])

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True


[      0
 0   0.0
 1   0.0
 2   0.0
 3   0.0
 4   0.0
 5   0.0
 6   0.0
 7   0.0
 8   0.0
 9   0.0
 10  0.0
 11  0.0
 12  0.0
 13  0.0
 14  0.0
 15  0.0
 16  0.0
 17  0.0
 18  0.0
 19  0.0
 20  0.0
 21  1.0
 22  1.0
 23  1.0
 24  1.0
 25  0.0
 26  0.0
 27  0.0
 28  0.0
 29  0.0]

In [165]:
train_x, test_x, train_y, test_y, train_class, test_class = create_multivariate_synthetic_training_data(rng, 30, 1, 1, 1, 0.1, 5)
train_x

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True


[      0    1    2    3    4
 0   0.0  0.0  0.0  0.0  0.0
 1   0.0  0.0  0.0  0.0  0.0
 2   0.0  0.0  0.0  0.0  0.0
 3   1.0  0.0  0.0  0.0  0.0
 4   1.0  0.0  0.0  0.0  0.0
 5   1.0  0.0  0.0  0.0  0.0
 6   0.0  0.0  0.0  0.0  1.0
 7   0.0  0.0  0.0  0.0  1.0
 8   0.0  0.0  0.0  0.0  1.0
 9   0.0  0.0  0.0  1.0  1.0
 10  0.0  0.0  0.0  1.0  1.0
 11  0.0  0.0  0.0  1.0  1.0
 12  0.0  0.0  1.0  1.0  1.0
 13  0.0  0.0  1.0  1.0  1.0
 14  0.0  0.0  1.0  1.0  1.0
 15  0.0  0.0  1.0  1.0  1.0
 16  0.0  1.0  1.0  1.0  1.0
 17  0.0  1.0  1.0  1.0  1.0
 18  0.0  1.0  1.0  1.0  1.0
 19  0.0  0.0  0.0  1.0  1.0
 20  0.0  0.0  0.0  0.0  1.0
 21  0.0  0.0  0.0  0.0  1.0
 22  0.0  0.0  0.0  0.0  1.0
 23  0.0  0.0  0.0  0.0  1.0
 24  0.0  0.0  0.0  0.0  1.0
 25  0.0  0.0  0.0  0.0  1.0
 26  0.0  0.0  0.0  0.0  1.0
 27  0.0  0.0  0.0  0.0  1.0
 28  0.0  0.0  0.0  0.0  0.0
 29  0.0  0.0  0.0  0.0  0.0]

In [166]:
[train_y, train_class]

[array([0.30666667]), array([3.])]

In [None]:
def compare_prediction_and_actual_classes(test_x, test_class, classifier):
    predicted_class = classifier.predict(test_x)
    predicted_vs_actual = pd.DataFrame({'predicted_class':predicted_class, 'actual_class':test_class}, columns = ['predicted_class', 'actual_class'])    
    return predicted_vs_actual

In [175]:
def compare_prediction_and_actual_values(test_x, test_y, regressor):
    predicted_values = regressor.predict(test_x)
    predicted_vs_actual = pd.DataFrame({'predicted_vals':predicted_values, 'actual_vals':test_y}, columns = ['predicted_vals', 'actual_vals'])    
    return predicted_vs_actual

In [18]:
def rocket_classifier_test(use_multivariate):
    time_length = 500
    num_ones_regions = 1
    train_count = 320
    test_count = 80
    num_kernels = 10000
    class_div = 0.1
    
    rng = np.random.default_rng()
    if use_multivariate:
        variable_count = 10
        train_x, test_x, train_y, test_y, train_class, test_class = create_multivariate_synthetic_training_data(rng, time_length, num_ones_regions, train_count, test_count, class_div, variable_count)
    else:
        train_x, test_x, train_y, test_y, train_class, test_class = create_univariate_synthetic_training_data(rng, time_length, num_ones_regions, train_count, test_count, class_div)
    rocket_pipeline_classifier = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), StandardScaler(with_mean=False), RidgeClassifierCV(alphas = np.logspace(-3, 3, 10)))
    rocket_pipeline_classifier.fit(train_x, train_class)
    print("ROCKET classifier fit done!")
    score = rocket_pipeline_classifier.score(test_x, test_class)
    print("Score on test data = ", score)
    print("ROCKET classifier score done!")
    predicted_vs_actual = compare_prediction_and_actual_classes(test_x, test_class, rocket_pipeline_classifier)
    return rocket_pipeline_classifier, predicted_vs_actual

In [173]:
def rocket_regression_test(use_multivariate):
    time_length = 500
    num_ones_regions = 1
    train_count = 320
    test_count = 80
    num_kernels = 10000
    # Class div not used in this case
    class_div = 50
    
    rng = np.random.default_rng()
    if use_multivariate:
        variable_count = 10
        train_x, test_x, train_y, test_y, train_class, test_class = create_multivariate_synthetic_training_data(rng, time_length, num_ones_regions, train_count, test_count, class_div, variable_count)
    else:
        train_x, test_x, train_y, test_y, train_class, test_class = create_univariate_synthetic_training_data(rng, time_length, num_ones_regions, train_count, test_count, class_div)
    rocket_pipeline = make_pipeline(Rocket(num_kernels=num_kernels, n_jobs=-1), StandardScaler(with_mean=False), RidgeCV(alphas=(0.1, 1.0, 10.0)))
    rocket_pipeline.fit(train_x, train_y)
    print("ROCKET regressor fit done!")
    score = rocket_pipeline.score(test_x, test_y)
    print("Score on test data = ", score)
    print("ROCKET regressor score done!")
    predicted_vs_actual = compare_prediction_and_actual_values(test_x, test_y, rocket_pipeline)
    return rocket_pipeline, predicted_vs_actual

In [20]:
pd.set_option('display.max_rows', 500)
rc, results = rocket_classifier_test(False)
results

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
ROCKET classifier fit done!
Score on test data =  0.975
ROCKET classifier score done!


Unnamed: 0,predicted_class,actual_class
0,8.0,8.0
1,6.0,6.0
2,1.0,1.0
3,2.0,2.0
4,1.0,1.0
5,1.0,1.0
6,5.0,5.0
7,8.0,8.0
8,3.0,3.0
9,1.0,1.0


In [21]:
rc, results = rocket_classifier_test(True)
results

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
ROCKET classifier fit done!
Score on test data =  0.5625
ROCKET classifier score done!


Unnamed: 0,predicted_class,actual_class
0,2.0,2.0
1,2.0,3.0
2,3.0,3.0
3,4.0,3.0
4,3.0,3.0
5,3.0,3.0
6,3.0,2.0
7,3.0,3.0
8,2.0,3.0
9,2.0,2.0


In [176]:
rc, predicted_vs_actual = rocket_regression_test(False)
predicted_vs_actual

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
ROCKET regressor fit done!
Score on test data =  0.9989502258545759
ROCKET regressor score done!


Unnamed: 0,predicted_vals,actual_vals
0,0.152869,0.152
1,0.502873,0.504
2,0.262047,0.262
3,0.3184,0.32
4,0.862511,0.864
5,0.726596,0.728
6,0.468082,0.468
7,0.395535,0.394
8,0.742397,0.742
9,0.046928,0.048


In [177]:
rc, predicted_vs_actual = rocket_regression_test(True)
predicted_vs_actual

Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
Check data:  True
ROCKET regressor fit done!
Score on test data =  0.8774273385803922
ROCKET regressor score done!


Unnamed: 0,predicted_vals,actual_vals
0,0.359708,0.3516
1,0.340364,0.371
2,0.353444,0.375
3,0.240404,0.2668
4,0.391921,0.4152
5,0.295077,0.2926
6,0.37019,0.38
7,0.247826,0.2292
8,0.258378,0.2438
9,0.389234,0.3688
