In [1]:
import subprocess
import pathlib
import random
import numpy as np
import pandas as pd

In [2]:
def random_search(f, hyperparameters, tries, set_seed=10):
    #Random search algorithm 
    random.seed(set_seed)
    best_result = None
    best_params = None
    for i in range(0, tries):
        random_params = choose_random_hyperparameters(hyperparameters)
        result = f(random_params)
        if  best_result is None or result > best_result:
            best_params = random_params
            best_result = result
    return best_params

def choose_random_hyperparameters(hyperparameters):
    random_params = []
    for hyperparam in hyperparameters:
        random_params.append(random.choice(hyperparam))
    return random_params

In [3]:
def greedy_search(f, hyperparameters, starting_parameters, tries_per_param, set_seed=10):
    #Greedy algorithm - optimize one hyperparameter at a time
    random.seed(set_seed)
    current_parameters = starting_parameters
    param_index = 0
    best_result = f(starting_parameters)
    for hyperparam in hyperparameters:
        if tries_per_param >= len(hyperparam):
            values = hyperparam
        else:
            values = random.sample(hyperparam, k=tries_per_param)
        for value in values:
            tested_parameters = current_parameters[:param_index] + [value] + current_parameters[param_index + 1:]
            result = f(tested_parameters)
            if result > best_result:
                current_parameters = tested_parameters
                best_result = result
        param_index += 1
    return current_parameters
     

In [4]:
test_parameters = [[1,2,3], [10,20,30], [100,200,300]]
def test_f(numbers):
    print(numbers)
    return sum(numbers)
starting_parameters = [2,30,100]

In [5]:
random_search(test_f, test_parameters, 5)

[3, 10, 200]
[2, 30, 100]
[1, 20, 200]
[2, 30, 100]
[1, 30, 200]


[1, 30, 200]

In [6]:
greedy_search(test_f, test_parameters, starting_parameters, 2)

[2, 30, 100]
[3, 30, 100]
[1, 30, 100]
[3, 20, 100]
[3, 30, 100]
[3, 30, 300]
[3, 30, 100]


[3, 30, 300]

In [7]:
datasets = {
    'airlines': (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff', 50000, False, True, True, [-1,9,10], [''], ['-1']),
    'wisdm': (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\wisdm.arff',5000, True, True, True, ['-1'], [''], ['-1']),
    'electricity': (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\electricity_arff.arff',45000, True, True, True, [-1,1], ['0,2'], ['-1']),
    'vavel_sup_slim': (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff',50000, False, True, True, [-1,13,14], ['0,5,7,9'], ['-1', '4,8', '4,8,12', '1,2,3,4,6,8,11,12']),
    'vavel_sup': (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff',50000, False, True, True),
    'vavel':  (r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_90500.arff',50000, False, False, True, [-1,0,1,37,40], ['2,8,17,18,23'], ['-1','6,22','6,22,29','3,4,5,6,16,22,26,29']),
    'rbf':  ('generators.RandomRBFGenerator',50000, True, True, False, ['-1'], [''], ['-1']),
    'hyperplane':  ('generators.HyperplaneGenerator',50000, True, True, False, ['-1'], [''], ['-1']),
    'led':  ('generators.LEDGenerator',50000, True, True, False, ['-1'], [''], ['-1']),

}

def get_args(dataset):
    return {
        'name': dataset[0],
        'size': dataset[1],
        'classification': dataset[2],
        'supervised': dataset[3],
        'filebased': dataset[4],
        'partition_ids': dataset[5],
        'time_ids': dataset[6],
        'buffer_ids': dataset[7],

    }

In [21]:
buffer_sizes = sorted(list({int(i) for i in np.logspace(0, 2, num=12)}))
cluster_num = sorted(list({int(i) for i in np.logspace(0, 1.4, num=12)}))
extractor = ['cluster','featureExtraction', 'cep']
cluster_types = ['clustree', 'clustream']
buffer_types = ['random', 'relevance']
rs = list(np.linspace(0, 1, num=21)[1:])

reg_buffer_learners = [
    'rules.functions.TargetMean',
    'rules.functions.FadingTargetMean',
    'functions.AdaGrad',
    'meta.AdaptiveRandomForestRegressor -s 10',
    'meta.AdaptiveRandomForestRegressor -s 10 -m 80',
    'rules.AMRulesRegressor',
    'trees.ARFFIMTDD',

]

clf_buffer_learners = [
    'functions.MajorityClass',
    'functions.NoChange',
    'lazy.SAMkNN',
    'bayes.NaiveBayes',
    'meta.AdaptiveRandomForest -s 10',
    'meta.AdaptiveRandomForest -s 10 -m 80',
    'trees.HoeffdingAdaptiveTree',
    'meta.OzaBagAdwin',
]

class ExperimentRunner:
    def __init__(self, dataset):
        self.test_quotient = 1
        self.dataset = dataset
        self.exp_args = get_args(datasets[dataset])
        self.task = 'EvaluatePrequential' if self.exp_args['classification'] else 'EvaluatePrequentialRegression' 
        self.learner = 'BufferLearner'
        self.buffer_learners = clf_buffer_learners if self.exp_args['classification'] else reg_buffer_learners

        self.relevance_learner = '(functions.AdaGrad)'
        self.stream = f'(ArffFileStream -f {self.exp_args["name"]})' if self.exp_args['filebased'] else self.exp_args['name']

        self.sample_freq = 1000 / self.test_quotient
        self.index = 0
        self.outs = []
        self.ran_params = []
    
    def generate_command(self, extractor, buffer_learner, n, cluster_num,  partition_id, time_ids, buffer_ids, cluster_type, buffer_type, r, target_dir = ''):
        pathlib.Path(f'runs/tuning/{target_dir}/{self.dataset}/{extractor}').mkdir(parents=True, exist_ok=True)

        target_file = f'runs/tuning/{target_dir}/{self.dataset}/{extractor}/{buffer_learner.replace(".","").replace(" ","")}_{n}_{cluster_num}_{partition_id}_{time_ids}_{buffer_ids}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
        start_cmd = f'{self.task} -l ({self.learner} -l ({buffer_learner}) -n {n} -m {self.relevance_learner}'
        end_cmd = f') -s {self.stream} -i {int(self.exp_args["size"] / self.test_quotient)} -f {int(self.sample_freq)} -d {target_file}'
        cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p {partition_id} {"" if time_ids == "" else f"-t {time_ids}"} -y {buffer_ids} -q {cluster_type} -x {cluster_num}{end_cmd}'
        cmd = cmd.replace("\\","/")
        return (cmd, target_file)
    
    def run_command(self, cmd, file_suffix):
        file_contents = f'@echo off\n\
        \n\
        set BASEDIR=%~dp0\..\n\
        set MEMORY=6g\n\
        \n\
        java -Xmx%MEMORY% -cp "%BASEDIR%/lib/*" -javaagent:"%BASEDIR%/lib/sizeofag-1.0.4.jar" ^\n\
        moa.DoTask ^\n\
        "{cmd} "'
        filename = r'..\moa-release-2020.07.1\bin\moa_test_run_' + file_suffix + '.bat'
        with open(filename, 'w') as file:
            file.write(file_contents)
        out = subprocess.Popen(filename)
        return out

    def run_and_get_value(self, params):
        if str(params) in self.ran_params:
            print("Params already ran - " + str(params))
            return float('-inf')
        self.ran_params.append(str(params))
        print(params)
        self.index += 1
        max_together = 1
        cmd, target_file = self.generate_command(params[0], params[1], params[2], params[3], params[4], 
                               params[5], params[6], params[7], params[8], params[9], target_dir = '20210831_test')
        print(f"Running cmd = {cmd}")
        out = self.run_command(cmd,f'{self.dataset}{self.index}')
        self.outs.append(out)

        if len(self.outs) >= max_together:
            self.outs[len(self.outs) - max_together].communicate()
        
        result_df = pd.read_csv(target_file, sep = ',')
        col_name = 'classifications correct (percent)' if self.exp_args['classification'] else 'mean absolute error'
        result = np.mean(result_df[col_name])
        print (f"Ran cmd with result {result}")
        run_result_file = target_file.rsplit( "/", 1 )[0] + "/_run_result.csv"
        with open(run_result_file, 'a') as file:
            stdev =  np.std(result_df[col_name])
            to_write = ",".join([str(p) for p in params]) + "," + "{:.2f}".format(stdev) + "," + "{:.2f}".format(result) + "\n"
            print("Writing " + to_write)
            file.write(to_write)
        return result if self.exp_args['classification']  else -result
            
    def run(self):       
        hyperparams_base = dict()
        for ex in extractor:
            hyperparams_base[ex] = [
                [ex],
                self.buffer_learners, 
                buffer_sizes, 
                cluster_num, 
                self.exp_args['partition_ids'], 
                self.exp_args['time_ids'], 
                self.exp_args['buffer_ids'],
                cluster_types,
                buffer_types,
                rs
            ]
            initial_params = random_search(self.run_and_get_value, hyperparams_base[ex], 10)
            best_params = greedy_search(self.run_and_get_value, hyperparams_base[ex], initial_params, 5)
            print(f"Best params for {ex}: "  + str(best_params))
    
    def run_standard_learners(self, target_dir):
        pathlib.Path(f'runs/tuning/{target_dir}/{self.dataset}/standard').mkdir(parents=True, exist_ok=True)
        target_path = [f'runs/tuning/{target_dir}/{self.dataset}/standard/{l.replace(" ","")}.csv' for l in self.buffer_learners]
        cmds = [f'{self.task} -l ({l}) -s {self.stream} -i {int(self.exp_args["size"] / self.test_quotient)} -f {int(self.sample_freq)} -d runs/tuning/{target_dir}/{self.dataset}/standard/{l.replace(" ","")}.csv' for l in self.buffer_learners]
        target_files = [f'runs/tuning/{target_dir}/{self.dataset}/standard/{l.replace(" ","")}.csv' for l in self.buffer_learners]
        max_together = 1
        for cmd, target_file, learner in zip(cmds,target_files, self.buffer_learners):
            self.index += 1
            print(f"Running cmd = {cmd}")
            out = self.run_command(cmd,f'{self.dataset}{self.index}')
            self.outs.append(out)

            if len(self.outs) >= max_together:
                self.outs[len(self.outs) - max_together].communicate()
            
            result_df = pd.read_csv(target_file, sep = ',')
            col_name = 'classifications correct (percent)' if self.exp_args['classification'] else 'mean absolute error'
            result = np.mean(result_df[col_name])
            rmse = np.mean(result_df['root mean squared error']) if not self.exp_args['classification'] else 0
            print (f"Ran cmd with result {result}")
            run_result_file = target_file.rsplit( "/", 1 )[0] + "/_run_result.csv"
            with open(run_result_file, 'a') as file:
                stdev =  np.std(result_df[col_name])
                to_write = learner + "," + "{:.2f}".format(rmse) + "," + "{:.2f}".format(result) + "\n"
                print("Writing " + to_write)
                file.write(to_write)

In [22]:
tested_datasets = ['airlines', 'wisdm', 'electricity','vavel_sup_slim', 'vavel_sup', 'rbf', 'hyperplane','led']
for ds in tested_datasets:
    runner = ExperimentRunner(ds)
    runner.run_standard_learners('20210831')

Running cmd = EvaluatePrequentialRegression -l (rules.functions.TargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff) -i 50000 -f 1000 -d runs/tuning/20210831/airlines/standard/rules.functions.TargetMean.csv
Ran cmd with result 21.005371429137234
Writing rules.functions.TargetMean,34.22,21.01

Running cmd = EvaluatePrequentialRegression -l (rules.functions.FadingTargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff) -i 50000 -f 1000 -d runs/tuning/20210831/airlines/standard/rules.functions.FadingTargetMean.csv
Ran cmd with result 20.00587052835034
Writing rules.functions.FadingTargetMean,33.47,20.01

Running cmd = EvaluatePrequentialRegression -l (functions.AdaGrad) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff) -i 50000 -f 1000 -d runs/tuning/20210831/airlines/standard/functions.AdaGrad.csv
Ran cmd with result 19.90536063918401
Writing functions.AdaGrad,33.31,1

Ran cmd with result 16772.156525531493
Writing functions.AdaGrad,123384.31,16772.16

Running cmd = EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -s 10) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff) -i 50000 -f 1000 -d runs/tuning/20210831/vavel_sup_slim/standard/meta.AdaptiveRandomForestRegressor-s10.csv
Ran cmd with result 50.117163543724864
Writing meta.AdaptiveRandomForestRegressor -s 10,140.23,50.12

Running cmd = EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff) -i 50000 -f 1000 -d runs/tuning/20210831/vavel_sup_slim/standard/meta.AdaptiveRandomForestRegressor-s10-m80.csv
Ran cmd with result 48.282737376413046
Writing meta.AdaptiveRandomForestRegressor -s 10 -m 80,136.77,48.28

Running cmd = EvaluatePrequentialRegression -l (rules.AMRulesRegressor) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGIST

IndexError: tuple index out of range

In [19]:
runner = ExperimentRunner('wisdm')
runner.run_standard_learners('20210831_test')

Running cmd = EvaluatePrequential -l (functions.MajorityClass) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\wisdm.arff) -i 5 -f 1 -d runs/tuning/20210831_test/wisdm/standard/functions.MajorityClass.csv
Ran cmd with result 41.33333333333333
Writing functions.MajorityClass,0.00,41.33

Running cmd = EvaluatePrequential -l (functions.NoChange) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\wisdm.arff) -i 5 -f 1 -d runs/tuning/20210831_test/wisdm/standard/functions.NoChange.csv
Ran cmd with result 45.33333333333333
Writing functions.NoChange,0.00,45.33

Running cmd = EvaluatePrequential -l (lazy.SAMkNN) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\wisdm.arff) -i 5 -f 1 -d runs/tuning/20210831_test/wisdm/standard/lazy.SAMkNN.csv
Ran cmd with result 41.33333333333333
Writing lazy.SAMkNN,0.00,41.33

Running cmd = EvaluatePrequential -l (bayes.NaiveBayes) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream

In [10]:
runner = ExperimentRunner('electricity')
runner.run()

['naive', 'trees.HoeffdingAdaptiveTree', 28, 25, -1, '0,2', '-1', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequential -l (BufferLearner -l (trees.HoeffdingAdaptiveTree) -n 28 -m (functions.AdaGrad) -c naive -r 0.30000000000000004 -b relevance -p -1 -t 0,2 -y -1 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/naive/treesHoeffdingAdaptiveTree_28_25_-1_0,2_-1_clustream_relevance_30.csv
Ran cmd with result 60.668371578977926
Writing naive,trees.HoeffdingAdaptiveTree,28,25,-1,0,2,-1,clustream,relevance,0.30000000000000004,13.59,60.67

['naive', 'meta.OzaBagAdwin', 12, 2, -1, '0,2', '-1', 'clustream', 'random', 1.0]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.OzaBagAdwin) -n 12 -m (functions.AdaGrad) -c naive -r 1.0 -b random -p -1 -t 0,2 -y -1 -q clustream -x 2) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/da

Ran cmd with result 60.668371578977926
Writing naive,trees.HoeffdingAdaptiveTree,5,7,1,0,2,-1,clustree,relevance,0.8500000000000001,13.59,60.67

['naive', 'meta.AdaptiveRandomForest -s 10', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 5 -m (functions.AdaGrad) -c naive -r 0.8500000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/naive/metaAdaptiveRandomForest-s10_5_7_1_0,2_-1_clustree_relevance_85.csv
Ran cmd with result 78.22541930494104
Writing naive,meta.AdaptiveRandomForest -s 10,5,7,1,0,2,-1,clustree,relevance,0.8500000000000001,10.03,78.23

['naive', 'meta.OzaBagAdwin', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.OzaBagAdwin) -n 5 -m

Ran cmd with result 82.59076101873254
Writing naive,meta.AdaptiveRandomForest -s 10,8,7,-1,0,2,-1,clustree,relevance,0.8500000000000001,10.37,82.59

Params already ran - ['naive', 'meta.AdaptiveRandomForest -s 10', 8, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Params already ran - ['naive', 'meta.AdaptiveRandomForest -s 10', 8, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Params already ran - ['naive', 'meta.AdaptiveRandomForest -s 10', 8, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Params already ran - ['naive', 'meta.AdaptiveRandomForest -s 10', 8, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
['naive', 'meta.AdaptiveRandomForest -s 10', 8, 7, 1, '0,2', '-1', 'clustream', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 8 -m (functions.AdaGrad) -c naive -r 0.8500000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustream -x 7) -s (Arff

Ran cmd with result 84.21328047458532
Writing cluster,meta.AdaptiveRandomForest -s 10 -m 80,5,7,1,0,2,-1,clustree,relevance,0.8500000000000001,7.98,84.21

['cluster', 'bayes.NaiveBayes', 18, 4, -1, '0,2', '-1', 'clustream', 'random', 0.9]
Running cmd = EvaluatePrequential -l (BufferLearner -l (bayes.NaiveBayes) -n 18 -m (functions.AdaGrad) -c cluster -r 0.9 -b random -p -1 -t 0,2 -y -1 -q clustream -x 4) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cluster/bayesNaiveBayes_18_4_-1_0,2_-1_clustream_random_90.csv
Ran cmd with result 79.00906488484776
Writing cluster,bayes.NaiveBayes,18,4,-1,0,2,-1,clustream,random,0.9,7.23,79.01

['cluster', 'lazy.SAMkNN', 18, 25, 1, '0,2', '-1', 'clustree', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequential -l (BufferLearner -l (lazy.SAMkNN) -n 18 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p 1 -t 0,2 -y -1 -q c

Ran cmd with result 87.47101350032544
Writing cluster,meta.AdaptiveRandomForest -s 10,8,7,1,0,2,-1,clustree,relevance,0.8500000000000001,7.17,87.47

['cluster', 'meta.AdaptiveRandomForest -s 10', 12, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 12 -m (functions.AdaGrad) -c cluster -r 0.8500000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cluster/metaAdaptiveRandomForest-s10_12_7_1_0,2_-1_clustree_relevance_85.csv
Ran cmd with result 87.47101350032544
Writing cluster,meta.AdaptiveRandomForest -s 10,12,7,1,0,2,-1,clustree,relevance,0.8500000000000001,7.17,87.47

['cluster', 'meta.AdaptiveRandomForest -s 10', 5, 3, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearn

Ran cmd with result 87.47101350032544
Writing cluster,meta.AdaptiveRandomForest -s 10,5,7,1,0,2,-1,clustree,relevance,0.1,7.17,87.47

['cluster', 'meta.AdaptiveRandomForest -s 10', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.7000000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 5 -m (functions.AdaGrad) -c cluster -r 0.7000000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cluster/metaAdaptiveRandomForest-s10_5_7_1_0,2_-1_clustree_relevance_70.csv
Ran cmd with result 87.47101350032544
Writing cluster,meta.AdaptiveRandomForest -s 10,5,7,1,0,2,-1,clustree,relevance,0.7000000000000001,7.17,87.47

['cluster', 'meta.AdaptiveRandomForest -s 10', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest 

Ran cmd with result 52.55823755651878
Writing featureExtraction,functions.MajorityClass,43,1,1,0,2,-1,clustree,relevance,0.75,16.47,52.56

Params already ran - ['featureExtraction', 'meta.AdaptiveRandomForest -s 10 -m 80', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Params already ran - ['featureExtraction', 'meta.AdaptiveRandomForest -s 10 -m 80', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
['featureExtraction', 'functions.MajorityClass', 5, 7, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (functions.MajorityClass) -n 5 -m (functions.AdaGrad) -c featureExtraction -r 0.8500000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/featureExtraction/functionsMajorityClass_5_7_1_0,2_-1_clustree_relevance_85.csv
Ran cmd with res

Ran cmd with result 71.03048872726511
Writing featureExtraction,meta.OzaBagAdwin,5,13,1,0,2,-1,clustree,relevance,0.8500000000000001,10.39,71.03

['featureExtraction', 'meta.OzaBagAdwin', 5, 25, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.OzaBagAdwin) -n 5 -m (functions.AdaGrad) -c featureExtraction -r 0.8500000000000001 -b relevance -p 1 -t 0,2 -y -1 -q clustree -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/featureExtraction/metaOzaBagAdwin_5_25_1_0,2_-1_clustree_relevance_85.csv
Ran cmd with result 71.03048872726511
Writing featureExtraction,meta.OzaBagAdwin,5,25,1,0,2,-1,clustree,relevance,0.8500000000000001,10.39,71.03

['featureExtraction', 'meta.OzaBagAdwin', 5, 18, 1, '0,2', '-1', 'clustree', 'relevance', 0.8500000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.OzaBagAdwin)

Ran cmd with result 77.72712306824783
Writing cep,meta.OzaBagAdwin,12,2,-1,0,2,-1,clustream,random,1.0,9.57,77.73

['cep', 'trees.HoeffdingAdaptiveTree', 18, 5, 1, '0,2', '-1', 'clustream', 'relevance', 0.25]
Running cmd = EvaluatePrequential -l (BufferLearner -l (trees.HoeffdingAdaptiveTree) -n 18 -m (functions.AdaGrad) -c cep -r 0.25 -b relevance -p 1 -t 0,2 -y -1 -q clustream -x 5) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cep/treesHoeffdingAdaptiveTree_18_5_1_0,2_-1_clustream_relevance_25.csv
Ran cmd with result 77.32132204712047
Writing cep,trees.HoeffdingAdaptiveTree,18,5,1,0,2,-1,clustream,relevance,0.25,7.85,77.32

['cep', 'bayes.NaiveBayes', 28, 25, 1, '0,2', '-1', 'clustree', 'random', 0.35000000000000003]
Running cmd = EvaluatePrequential -l (BufferLearner -l (bayes.NaiveBayes) -n 28 -m (functions.AdaGrad) -c cep -r 0.35000000000000003 -b random -p 1 -t 0,2 -y -1 -q clustr

Ran cmd with result 90.17060096792005
Writing cep,meta.AdaptiveRandomForest -s 10,5,10,1,0,2,-1,clustree,random,0.2,7.34,90.17

['cep', 'meta.AdaptiveRandomForest -s 10', 28, 10, 1, '0,2', '-1', 'clustree', 'random', 0.2]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 28 -m (functions.AdaGrad) -c cep -r 0.2 -b random -p 1 -t 0,2 -y -1 -q clustree -x 10) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cep/metaAdaptiveRandomForest-s10_28_10_1_0,2_-1_clustree_random_20.csv
Ran cmd with result 90.17060096792005
Writing cep,meta.AdaptiveRandomForest -s 10,28,10,1,0,2,-1,clustree,random,0.2,7.34,90.17

['cep', 'meta.AdaptiveRandomForest -s 10', 65, 10, 1, '0,2', '-1', 'clustree', 'random', 0.2]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 65 -m (functions.AdaGrad) -c cep -r 0.2 -b random -p 1 -t 0,2 -y -1

Ran cmd with result 90.17060096792005
Writing cep,meta.AdaptiveRandomForest -s 10,18,10,1,0,2,-1,clustree,random,0.4,7.34,90.17

['cep', 'meta.AdaptiveRandomForest -s 10', 18, 10, 1, '0,2', '-1', 'clustree', 'random', 0.6000000000000001]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 18 -m (functions.AdaGrad) -c cep -r 0.6000000000000001 -b random -p 1 -t 0,2 -y -1 -q clustree -x 10) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/electricity_arff.arff) -i 45 -f 1 -d runs/tuning/20210831_test/electricity/cep/metaAdaptiveRandomForest-s10_18_10_1_0,2_-1_clustree_random_60.csv
Ran cmd with result 90.17060096792005
Writing cep,meta.AdaptiveRandomForest -s 10,18,10,1,0,2,-1,clustree,random,0.6000000000000001,7.34,90.17

['cep', 'meta.AdaptiveRandomForest -s 10', 18, 10, 1, '0,2', '-1', 'clustree', 'random', 0.1]
Running cmd = EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -s 10) -n 18 -m (functions.AdaG

In [18]:
runner = ExperimentRunner('airlines')
runner.run()

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10', 28, 25, -1, '', '-1', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 28 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p -1  -y -1 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/cluster/metaAdaptiveRandomForestRegressor-s10_28_25_-1__-1_clustream_relevance_30.csv
Ran cmd with result 17.403898239550003
Writing cluster,meta.AdaptiveRandomForestRegressor -s 10,28,25,-1,,-1,clustream,relevance,0.30000000000000004,2.85,17.40

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 7, -1, '', '-1', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 28 -m (functions.AdaGrad) -c cluster -r 0.25 

Ran cmd with result 17.373668417695
Writing cluster,trees.ARFFIMTDD,5,13,10,,-1,clustree,random,0.25,2.67,17.37

['cluster', 'functions.AdaGrad', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (functions.AdaGrad) -n 5 -m (functions.AdaGrad) -c cluster -r 0.25 -b random -p 10  -y -1 -q clustree -x 13) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/cluster/functionsAdaGrad_5_13_10__-1_clustree_random_25.csv
Ran cmd with result 27.480325315733502
Writing cluster,functions.AdaGrad,5,13,10,,-1,clustree,random,0.25,12.34,27.48

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 1, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 1 -m (functions.AdaGrad) -c cluster -r 0.25 -b random -p 10  -y -1 -q clustree -x 13) -s 

Ran cmd with result 16.40093393943003
Writing cluster,meta.AdaptiveRandomForestRegressor -s 10 -m 80,28,5,10,,-1,clustream,random,0.25,2.90,16.40

Params already ran - ['cluster', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 5, 10, '', '-1', 'clustree', 'random', 0.25]
['cluster', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 5, 10, '', '-1', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 28 -m (functions.AdaGrad) -c cluster -r 0.25 -b relevance -p 10  -y -1 -q clustree -x 5) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/cluster/metaAdaptiveRandomForestRegressor-s10-m80_28_5_10__-1_clustree_relevance_25.csv
Ran cmd with result 16.351591992356646
Writing cluster,meta.AdaptiveRandomForestRegressor -s 10 -m 80,28,5,10,,-1,clustree,relevance,0.25,2.88,16.35

['cluster', 'meta.AdaptiveRa

Ran cmd with result 17.373668417695
Writing featureExtraction,trees.ARFFIMTDD,43,3,-1,,-1,clustree,random,0.8,2.67,17.37

['featureExtraction', 'trees.ARFFIMTDD', 65, 2, 10, '', '-1', 'clustream', 'relevance', 1.0]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (trees.ARFFIMTDD) -n 65 -m (functions.AdaGrad) -c featureExtraction -r 1.0 -b relevance -p 10  -y -1 -q clustream -x 2) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/featureExtraction/treesARFFIMTDD_65_2_10__-1_clustream_relevance_100.csv
Ran cmd with result 17.373668417695
Writing featureExtraction,trees.ARFFIMTDD,65,2,10,,-1,clustream,relevance,1.0,2.67,17.37

['featureExtraction', 'rules.functions.TargetMean', 2, 13, -1, '', '-1', 'clustream', 'relevance', 0.75]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 2 -m (functions.AdaGrad) -c featureExtraction -r 0.75 -b relevan

Ran cmd with result 17.416526452334942
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10,18,7,-1,,-1,clustree,relevance,0.25,2.75,17.42

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 43, 5, -1, '', '-1', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 43 -m (functions.AdaGrad) -c featureExtraction -r 0.25 -b relevance -p -1  -y -1 -q clustree -x 5) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/featureExtraction/metaAdaptiveRandomForestRegressor-s10_43_5_-1__-1_clustree_relevance_25.csv
Ran cmd with result 16.357579696414337
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10,43,5,-1,,-1,clustree,relevance,0.25,2.83,16.36

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 43, 3, -1, '', '-1', 'clustree', 'relevance', 0.25]
Running cmd = Eva

Ran cmd with result 16.35824516472158
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10,43,7,-1,,-1,clustree,random,0.15000000000000002,2.83,16.36

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 43, 7, -1, '', '-1', 'clustree', 'random', 0.4]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 43 -m (functions.AdaGrad) -c featureExtraction -r 0.4 -b random -p -1  -y -1 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/featureExtraction/metaAdaptiveRandomForestRegressor-s10_43_7_-1__-1_clustree_random_40.csv
Ran cmd with result 16.35327446372838
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10,43,7,-1,,-1,clustree,random,0.4,2.84,16.35

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 43, 7, -1, '', '-1', 'clustree', 'random', 0.6000000000000001]
Running cmd

Ran cmd with result 17.373668417695
Writing cep,rules.AMRulesRegressor,100,3,-1,,-1,clustream,relevance,0.4,2.67,17.37

Params already ran - ['cep', 'meta.AdaptiveRandomForestRegressor -s 10', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Params already ran - ['cep', 'meta.AdaptiveRandomForestRegressor -s 10', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
['cep', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 5 -m (functions.AdaGrad) -c cep -r 0.25 -b random -p 10  -y -1 -q clustree -x 13) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/cep/metaAdaptiveRandomForestRegressor-s10-m80_5_13_10__-1_clustree_random_25.csv
Ran cmd with result 17.38853402908177
Writing cep,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,13,10,,-1,clust

Ran cmd with result 17.373668417695
Writing cep,rules.functions.TargetMean,5,13,-1,,-1,clustree,random,0.25,2.67,17.37

['cep', 'rules.functions.TargetMean', 5, 13, 9, '', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 5 -m (functions.AdaGrad) -c cep -r 0.25 -b random -p 9  -y -1 -q clustree -x 13) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/airlines.arff) -i 50 -f 1 -d runs/tuning/20210831_test/airlines/cep/rulesfunctionsTargetMean_5_13_9__-1_clustree_random_25.csv
Ran cmd with result 17.373668417695
Writing cep,rules.functions.TargetMean,5,13,9,,-1,clustree,random,0.25,2.67,17.37

Params already ran - ['cep', 'rules.functions.TargetMean', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Params already ran - ['cep', 'rules.functions.TargetMean', 5, 13, 10, '', '-1', 'clustree', 'random', 0.25]
Params already ran - ['cep', 'rules.functions.TargetMean', 5, 13, 10, '', '-1', 'cl

In [20]:
runner = ExperimentRunner('vavel_sup_slim')
runner.run()

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10', 28, 25, -1, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 28 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p -1 -t 0,5,7,9 -y 1,2,3,4,6,8,11,12 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cluster/metaAdaptiveRandomForestRegressor-s10_28_25_-1_0,5,7,9_1,2,3,4,6,8,11,12_clustream_relevance_30.csv
Ran cmd with result 76.11452689663484
Writing cluster,meta.AdaptiveRandomForestRegressor -s 10,28,25,-1,0,5,7,9,1,2,3,4,6,8,11,12,clustream,relevance,0.30000000000000004,9.91,76.11

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 7, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (Buffe

Ran cmd with result 78.3904616327173
Writing cluster,rules.functions.TargetMean,28,25,-1,0,5,7,9,1,2,3,4,6,8,11,12,clustream,relevance,0.30000000000000004,8.65,78.39

Params already ran - ['cluster', 'meta.AdaptiveRandomForestRegressor -s 10', 28, 25, -1, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
['cluster', 'trees.ARFFIMTDD', 28, 25, -1, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (trees.ARFFIMTDD) -n 28 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p -1 -t 0,5,7,9 -y 1,2,3,4,6,8,11,12 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cluster/treesARFFIMTDD_28_25_-1_0,5,7,9_1,2,3,4,6,8,11,12_clustream_relevance_30.csv
Ran cmd with result 78.3904616327173
Writing cluster,trees.ARFFIMTDD,28,25,-1,0,5,7,9,1,2,3,4,6,8,

Ran cmd with result 78.3904616327173
Writing cluster,rules.functions.TargetMean,28,25,13,0,5,7,9,1,2,3,4,6,8,11,12,clustream,relevance,0.30000000000000004,8.65,78.39

['cluster', 'rules.functions.TargetMean', 28, 25, 14, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 28 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p 14 -t 0,5,7,9 -y 1,2,3,4,6,8,11,12 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cluster/rulesfunctionsTargetMean_28_25_14_0,5,7,9_1,2,3,4,6,8,11,12_clustream_relevance_30.csv
Ran cmd with result 78.3904616327173
Writing cluster,rules.functions.TargetMean,28,25,14,0,5,7,9,1,2,3,4,6,8,11,12,clustream,relevance,0.30000000000000004,8.65,78.39

Params already ran - ['cluster', 'rules.functions.TargetMean', 28, 

Ran cmd with result 78.3904616327173
Writing cluster,rules.functions.TargetMean,28,25,-1,0,5,7,9,1,2,3,4,6,8,11,12,clustream,relevance,0.1,8.65,78.39

Best params for cluster: ['cluster', 'rules.functions.TargetMean', 28, 25, -1, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 28, 25, -1, '0,5,7,9', '1,2,3,4,6,8,11,12', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 28 -m (functions.AdaGrad) -c featureExtraction -r 0.30000000000000004 -b relevance -p -1 -t 0,5,7,9 -y 1,2,3,4,6,8,11,12 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/featureExtraction/metaAdaptiveRandomForestRegressor-s10_28_25_-1_0,5,7,9_1,2,3,4,6,8,11,12_clustream_relevance_30.csv
Ran cmd with result 7

Ran cmd with result 77.51795837666872
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,13,14,0,5,7,9,-1,clustree,random,0.25,8.92,77.52

['featureExtraction', 'rules.functions.TargetMean', 5, 13, 14, '0,5,7,9', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 5 -m (functions.AdaGrad) -c featureExtraction -r 0.25 -b random -p 14 -t 0,5,7,9 -y -1 -q clustree -x 13) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/featureExtraction/rulesfunctionsTargetMean_5_13_14_0,5,7,9_-1_clustree_random_25.csv
Ran cmd with result 78.3904616327173
Writing featureExtraction,rules.functions.TargetMean,5,13,14,0,5,7,9,-1,clustree,random,0.25,8.65,78.39

Params already ran - ['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10', 5, 13, 14, '0,5,7,9', '-1', 'clustree', 'random', 0.25]
['featureExtr

Ran cmd with result 77.51795837666872
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,25,14,0,5,7,9,-1,clustree,random,0.25,8.92,77.52

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 5, 4, 14, '0,5,7,9', '-1', 'clustree', 'random', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 5 -m (functions.AdaGrad) -c featureExtraction -r 0.25 -b random -p 14 -t 0,5,7,9 -y -1 -q clustree -x 4) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/featureExtraction/metaAdaptiveRandomForestRegressor-s10-m80_5_4_14_0,5,7,9_-1_clustree_random_25.csv
Ran cmd with result 77.51795837666872
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,4,14,0,5,7,9,-1,clustree,random,0.25,8.92,77.52

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 5, 13, -1,

Ran cmd with result 77.51795837666872
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,13,14,0,5,7,9,-1,clustree,random,0.15000000000000002,8.92,77.52

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 5, 13, 14, '0,5,7,9', '-1', 'clustree', 'random', 0.4]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10 -m 80) -n 5 -m (functions.AdaGrad) -c featureExtraction -r 0.4 -b random -p 14 -t 0,5,7,9 -y -1 -q clustree -x 13) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/featureExtraction/metaAdaptiveRandomForestRegressor-s10-m80_5_13_14_0,5,7,9_-1_clustree_random_40.csv
Ran cmd with result 77.51795837666872
Writing featureExtraction,meta.AdaptiveRandomForestRegressor -s 10 -m 80,5,13,14,0,5,7,9,-1,clustree,random,0.4,8.92,77.52

['featureExtraction', 'meta.AdaptiveRandomForestRegressor -s 10 -m

Ran cmd with result 78.3904616327173
Writing cep,rules.AMRulesRegressor,8,3,14,0,5,7,9,-1,clustream,relevance,0.5,8.65,78.39

['cep', 'rules.AMRulesRegressor', 100, 3, -1, '0,5,7,9', '4,8,12', 'clustream', 'relevance', 0.4]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.AMRulesRegressor) -n 100 -m (functions.AdaGrad) -c cep -r 0.4 -b relevance -p -1 -t 0,5,7,9 -y 4,8,12 -q clustream -x 3) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cep/rulesAMRulesRegressor_100_3_-1_0,5,7,9_4,8,12_clustream_relevance_40.csv
Ran cmd with result 78.3904616327173
Writing cep,rules.AMRulesRegressor,100,3,-1,0,5,7,9,4,8,12,clustream,relevance,0.4,8.65,78.39

Params already ran - ['cep', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 7, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.25]
Params already ran - ['cep', 'meta.AdaptiveRandomForestRegressor -s 10 -m 80', 28, 7, -1,

Ran cmd with result 78.3904616327173
Writing cep,rules.functions.TargetMean,28,1,-1,0,5,7,9,4,8,12,clustree,relevance,0.25,8.65,78.39

['cep', 'rules.functions.TargetMean', 28, 25, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 28 -m (functions.AdaGrad) -c cep -r 0.25 -b relevance -p -1 -t 0,5,7,9 -y 4,8,12 -q clustree -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cep/rulesfunctionsTargetMean_28_25_-1_0,5,7,9_4,8,12_clustree_relevance_25.csv
Ran cmd with result 78.3904616327173
Writing cep,rules.functions.TargetMean,28,25,-1,0,5,7,9,4,8,12,clustree,relevance,0.25,8.65,78.39

['cep', 'rules.functions.TargetMean', 28, 4, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.25]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 28 -m (fun

Ran cmd with result 78.3904616327173
Writing cep,rules.functions.TargetMean,28,7,-1,0,5,7,9,4,8,12,clustree,relevance,0.4,8.65,78.39

['cep', 'rules.functions.TargetMean', 28, 7, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.6000000000000001]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules.functions.TargetMean) -n 28 -m (functions.AdaGrad) -c cep -r 0.6000000000000001 -b relevance -p -1 -t 0,5,7,9 -y 4,8,12 -q clustree -x 7) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 50 -f 1 -d runs/tuning/20210831_test/vavel_sup_slim/cep/rulesfunctionsTargetMean_28_7_-1_0,5,7,9_4,8,12_clustree_relevance_60.csv
Ran cmd with result 78.3904616327173
Writing cep,rules.functions.TargetMean,28,7,-1,0,5,7,9,4,8,12,clustree,relevance,0.6000000000000001,8.65,78.39

['cep', 'rules.functions.TargetMean', 28, 7, -1, '0,5,7,9', '4,8,12', 'clustree', 'relevance', 0.1]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (rules

In [27]:
runner = ExperimentRunner('vavel')
runner.run()

['cluster', 'meta.AdaptiveRandomForestRegressor -s 10', 28, 25, -1, '2,8,17,18,23', '3,4,5,6,16,22,26,29', 'clustream', 'relevance', 0.30000000000000004]
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -s 10) -n 28 -m (functions.AdaGrad) -c cluster -r 0.30000000000000004 -b relevance -p -1 -t 2,8,17,18,23 -y 3,4,5,6,16,22,26,29 -q clustream -x 25) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_90500.arff) -i 5000 -f 100 -d runs/tuning/20210831_test/vavel/cluster/metaAdaptiveRandomForestRegressor-s10_28_25_-1_2,8,17,18,23_3,4,5,6,16,22,26,29_clustream_relevance_30.csv


TypeError: Could not convert ??????????????????????????????????????????????????mean absolute error?????????????????????????????????????????????????? to numeric

In [None]:
reg_buffer_learners = [
    'rules.functions.TargetMean',
    'rules.functions.FadingTargetMean',
    'functions.AdaGrad',
    'meta.AdaptiveRandomForestRegressor -s 10',
    'meta.AdaptiveRandomForestRegressor -s 10 -m 80',
    'rules.AMRulesRegressor',
    'rules.functions.Perceptron',
    'trees.ARFFIMTDD',

]

clf_buffer_learners = [
    'functions.MajorityClass',
    'functions.NoChange',
    'lazy.SAMkNN',
    'bayes.NaiveBayes',
    'meta.AdaptiveRandomForest -s 10',
    'meta.AdaptiveRandomForest -s 10 -m 80',
    'trees.HoeffdingAdaptiveTree',
    'meta.OzaBagAdwin',
]

if str(params) in self.ran_params:
    print("Params already ran - " + str(params))
    return float('-inf')
self.ran_params.append(str(params))
print(params)
self.index += 1
max_together = 1
cmd, target_file = self.generate_command(params[0], params[1], params[2], params[3], params[4], 
                       params[5], params[6], params[7], params[8], params[9], target_dir = '20210831_test')
print(f"Running cmd = {cmd}")
out = self.run_command(cmd,f'{self.dataset}{self.index}')
self.outs.append(out)

if len(self.outs) >= max_together:
    self.outs[len(self.outs) - max_together].communicate()

result_df = pd.read_csv(target_file, sep = ',')
col_name = 'classifications correct (percent)' if self.exp_args['classification'] else 'mean absolute error'
result = np.mean(result_df[col_name])
print (f"Ran cmd with result {result}")
run_result_file = target_file.rsplit( "/", 1 )[0] + "/_run_result.csv"
with open(run_result_file, 'a') as file:
    stdev =  np.std(result_df[col_name])
    to_write = ",".join([str(p) for p in params]) + "," + "{:.2f}".format(stdev) + "," + "{:.2f}".format(result) + "\n"
    print("Writing " + to_write)
    file.write(to_write)
return result if self.exp_args['classification']  else -result

std_set = r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff'
slim_set = r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff'


std_learn = [f'EvaluatePrequentialRegression -l ({l}) -s (ArffFileStream -f {std_set} ) -i 32000 -f 1000 -q 1000 -d runs/tuning/{l.replace(" ","")}' for l in learners]
slim_learn = [f'EvaluatePrequentialRegression -l ({l}) -s (ArffFileStream -f {slim_set} ) -i 32000 -f 1000 -q 1000 -d runs/tuning/slim_{l.replace(" ","")}' for l in learners]

In [92]:
file =  r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff'
task = 'EvaluatePrequentialRegression'
learner = 'BufferLearner'
arf_k = 14
# buffer_learner = f'(meta.AdaptiveRandomForestRegressor -l (ARFFIMTDD -k {arf_k} -s VarianceReductionSplitCriterion -g 50 -c 0.01) -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'(meta.AdaptiveRandomForestRegressor -l (ARFFIMTDD -k {arf_k} -s VarianceReductionSplitCriterion -g 50 -c 0.01) -m 80 -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
stream = f'(ArffFileStream -f {file})'

id_index = 15

i = 32000
f = 1000
q = 1000
def generate_command(n, extractor, partition_id, time_ids, cluster_num, buffer_ids, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}vavel_slim_{extractor}_{n}_{cluster_num}_{partition_id}_{time_ids}_{buffer_ids}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p {partition_id} -t {time_ids} -y {buffer_ids} -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [93]:
buffer_sizes = [1,5,10,25,50,100]
cluster_num = [2,3,4,5,8,25]
extractor = ['naive','cluster','featureExtraction', 'cep']
vavel_slim_partition_id = [13, 14]
time_ids = '0,5,7,9'
buffer_ids = ['-1', '4,8', '4,8,12', '1,2,3,4,6,8,11,12']
cluster_types = ['clustree', 'clustream']

buffer_types = ['random', 'relevance']
rs = [0.1, 0.25, 0.5]

In [94]:
def run_command(cmd, file_suffix):
    file_contents = f'@echo off\n\
    \n\
    set BASEDIR=%~dp0\..\n\
    set MEMORY=6g\n\
    \n\
    java -Xmx%MEMORY% -cp "%BASEDIR%/lib/*" -javaagent:"%BASEDIR%/lib/sizeofag-1.0.4.jar" ^\n\
    moa.DoTask ^\n\
    "{cmd} "'
    filename = r'..\moa-release-2020.07.1\bin\moa_test_run_' + file_suffix + '.bat'
    with open(filename, 'w') as file:
        file.write(file_contents)
    out = subprocess.Popen(filename)
    return out

In [95]:
outs = []
max_together = 2

In [20]:
outs = []
for n in buffer_sizes:
    cmd = generate_command(n, 'cluster', 14, time_ids, 8, '-1', target_dir = '20210829/cluster/n/')
    print(f"Running with n = {n}")
    print(f"Running cmd = {cmd}")
    out = run_command(cmd,f'n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -l (ARFFIMTDD -k 14 -s VarianceReductionSplitCriterion -g 50 -c 0.01) -m 80 -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01)) -n 1 -m (meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01)) -c cluster -r 1 -b random -p 14 -t 0,5,7,9 -y -1 -q clustree -x 8) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/result_sup_slim.arff) -i 32000 -f 1000 -q 1000 -d runs/tuning/20210829/cluster/n/vavel_slim_cluster_1_8_14_0,5,7,9_-1_clustree_random_100.csv
Running with n = 5
Running cmd = EvaluatePrequentialRegression -l (BufferLearner -l (meta.AdaptiveRandomForestRegressor -l (ARFFIMTDD -k 14 -s VarianceReductionSplitCriterion -g 50 -c 0.01) -m 80 -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01)) -n 5 -m (meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADW

In [47]:
for n in [1,3,5,10]:
    cmd = generate_command(n, 'naive', 14, time_ids, 25, '4,8,12', target_dir = '20210613/naive/n/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'naive_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10


In [48]:
for n in buffer_sizes[1:]:
    cmd = generate_command(n, 'cep', 14, time_ids, 6, '-1', target_dir = '20210613/cep/n/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'cep_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 5
Running with n = 10
Running with n = 25
Running with n = 50
Running with n = 100


In [49]:
for n in [1,3,5,10,25]:
    cmd = generate_command(n, 'featureExtraction', 14, time_ids, 25, '-1', target_dir = '20210613/extract/n/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'extract_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10
Running with n = 25


In [None]:
for n in [5,10,25]:
    cmd = generate_command(n, 'featureExtraction', 14, time_ids, 25, '-1', target_dir = '20210613/extract/n/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'extract_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

In [50]:
for c in cluster_num:
    if c != 8:
        cmd = generate_command(25, 'cluster', 14, time_ids, c, '-1', target_dir = '20210613/cluster/c/')
        print(f"Running with c = {c}")
        out = run_command(cmd,f'c{c}')
        outs.append(out)

        if len(outs) >= max_together:
            outs[len(outs) - max_together].communicate()

Running with c = 2
Running with c = 3
Running with c = 4
Running with c = 5
Running with c = 25


In [53]:
for c in [10,15]:
    if c != 8:
        cmd = generate_command(25, 'cluster', 14, time_ids, c, '-1', target_dir = '20210613/cluster/c/')
        print(f"Running with c = {c}")
        out = run_command(cmd,f'c{c}')
        outs.append(out)

        if len(outs) >= max_together:
            outs[len(outs) - max_together].communicate()

Running with c = 10
Running with c = 15


In [52]:
clustream_num = [3,5,8,10,15,25]
for c in clustream_num:
    if c != 8:
        cmd = generate_command(25, 'cluster', 14, time_ids, c, '-1', 'clustream', target_dir = '20210613/cluster/c/')
        print(f"Running with c = {c}")
        out = run_command(cmd,f'c{c}')
        outs.append(out)

        if len(outs) >= max_together:
            outs[len(outs) - max_together].communicate()

Running with c = 3
Running with c = 5
Running with c = 10
Running with c = 15
Running with c = 25


In [35]:
for c in [10,15]:
    cmd = generate_command(100, 'cluster', 14, time_ids, c, '-1')
    run_command(cmd,f'c{c}')

In [38]:
clustream_num = [3,5,8,10,15,25]

In [39]:
for c in clustream_num[:2]:
    cmd = generate_command(100, 'cluster', 14, time_ids, c, '-1', 'clustream')
    run_command(cmd,f'c2{c}')

In [40]:
for c in clustream_num[2:4]:
    cmd = generate_command(100, 'cluster', 14, time_ids, c, '-1', 'clustream')
    run_command(cmd,f'c2{c}')

In [41]:
for c in clustream_num[4:]:
    cmd = generate_command(100, 'cluster', 14, time_ids, c, '-1', 'clustream')
    run_command(cmd,f'c2{c}')

In [7]:
max_together = 2

In [6]:
buffer_ids

['-1', '4,8', '4,8,12', '1,2,3,4,6,8,11,12']

In [7]:
for b in buffer_ids:
    cmd = generate_command(100, 'cluster', 14, time_ids, 25, b)
    print(f"Running with b = {b}")
    out = run_command(cmd,f'b{b.replace(",","-")}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with b = -1
Running with b = 4,8
Running with b = 4,8,12
Running with b = 1,2,3,4,6,8,11,12


Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10


Running with n = 5
Running with n = 10
Running with n = 25
Running with n = 50
Running with n = 100


Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10
Running with n = 25


In [16]:
for c in [3,4,8,10]:
    cmd = generate_command(25, 'cep', 14, time_ids, c, '-1')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'cep_c{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 3
Running with c = 4
Running with c = 8
Running with c = 10


In [20]:
for c in [3,4,8,10]:
    cmd = generate_command(25, 'cep', 14, time_ids, c, '-1','clustream')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'cep_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 3
Running with c = 4
Running with c = 8
Running with c = 10


In [18]:
for c in cluster_num[:-1]:
    cmd = generate_command(25, 'featureExtraction', 14, time_ids, c, '-1')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'extract_c{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 2
Running with c = 3
Running with c = 4
Running with c = 5
Running with c = 8


In [19]:
for c in cluster_num:
    cmd = generate_command(25, 'featureExtraction', 14, time_ids, c, '-1', 'clustream')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'extract_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 2
Running with c = 3
Running with c = 4
Running with c = 5
Running with c = 8
Running with c = 25


In [25]:
for r in rs:
    cmd = generate_command(100, 'cluster', 14, time_ids, 25, '-1', r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'r{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [26]:
for r in rs:
    cmd = generate_command(100, 'cluster', 14, time_ids, 25, '-1', buffer_type= 'relevance',r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'r2{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [27]:
for r in rs:
    cmd = generate_command(25, 'featureExtraction', 14, time_ids, 8, '-1', 'clustream', r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'extract_r{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [29]:
for r in rs:
    cmd = generate_command(25, 'featureExtraction', 14, time_ids, 8, '-1', 'clustream', buffer_type='relevance', r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'extract_r2{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [30]:
for c in [6]:
    cmd = generate_command(25, 'cep', 14, time_ids, c, '-1','clustream')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'cep_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 6


In [32]:
for r in rs:
    cmd = generate_command(25, 'cep', 14, time_ids, 6, '-1','clustream', buffer_type='relevance', r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'cep_r2{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [33]:
for r in rs:
    cmd = generate_command(25, 'cep', 14, time_ids, 6, '-1','clustream', r=r)
    print(f"Running with r = {r}")
    out = run_command(cmd,f'cep_r{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [31]:
for p in [13]:
    cmd = generate_command(100, 'cluster', p, time_ids, 25, '-1', buffer_type= 'relevance',r=0.1)
    print(f"Running with p = {p}")
    out = run_command(cmd,f'p{p}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with p = 13


In [36]:
for p in [13]:
    cmd = generate_command(25, 'featureExtraction', p, time_ids, 8, '-1', 'clustream', buffer_type='random', r=0.1)
    print(f"Running with p = {p}")
    out = run_command(cmd,f'extract_p{p}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with p = 13


In [37]:
for  p in [13]:
    cmd = generate_command(25, 'cep', 14, time_ids, 6, '-1','clustree', r=0.5)
    print(f"Running with p = {p}")
    out = run_command(cmd,f'cep_p{p}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with p = 13


In [15]:
with open(r'..\moa-release-2020.07.1\bin\moa_test_run.bat', 'w') as file:
    file.write(file_contents)

In [19]:
out = subprocess.Popen(r'..\moa-release-2020.07.1\bin\moa_test_run.bat')

In [13]:
file_contents = f'@echo off\n\
\n\
set BASEDIR=%~dp0\..\n\
set MEMORY=6g\n\
\n\
java -Xmx%MEMORY% -cp "%BASEDIR%/lib/*" -javaagent:"%BASEDIR%/lib/sizeofag-1.0.4.jar" ^\n\
moa.DoTask ^\n\
"{cmd} "'

In [7]:
learners = [
    'rules.functions.TargetMean',
    'rules.functions.FadingTargetMean',
    'functions.AdaGrad',
    'meta.AdaptiveRandomForestRegressor',
    'meta.AdaptiveRandomForestRegressor -m 80',
    'rules.AMRulesRegressor',
    'rules.functions.Perceptron',
    'trees.ARFFIMTDD',

]
std_set = r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff'
slim_set = r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff'


std_learn = [f'EvaluatePrequentialRegression -l ({l}) -s (ArffFileStream -f {std_set} ) -i 32000 -f 1000 -q 1000 -d runs/tuning/{l.replace(" ","")}' for l in learners]
slim_learn = [f'EvaluatePrequentialRegression -l ({l}) -s (ArffFileStream -f {slim_set} ) -i 32000 -f 1000 -q 1000 -d runs/tuning/slim_{l.replace(" ","")}' for l in learners]

In [8]:
std_learn[4:5]

['EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -m 80) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/meta.AdaptiveRandomForestRegressor-m80']

In [None]:
index = 0
for cmd in std_learn:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (rules.functions.TargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/rules.functions.TargetMean
Running cmd EvaluatePrequentialRegression -l (rules.functions.FadingTargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/rules.functions.FadingTargetMean
Running cmd EvaluatePrequentialRegression -l (functions.AdaGrad) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/functions.AdaGrad
Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/meta.AdaptiveRandomForestRegressor
Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -m 80) -s

In [10]:
index = 0
for cmd in std_learn[-2:]:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (rules.functions.Perceptron) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/rules.functions.Perceptron
Running cmd EvaluatePrequentialRegression -l (trees.ARFFIMTDD) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/trees.ARFFIMTDD


In [11]:
index = 0
for cmd in slim_learn:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (rules.functions.TargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/rules.functions.TargetMean
Running cmd EvaluatePrequentialRegression -l (rules.functions.FadingTargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/rules.functions.FadingTargetMean
Running cmd EvaluatePrequentialRegression -l (functions.AdaGrad) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/functions.AdaGrad
Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/meta.AdaptiveRandomForestRegressor
Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomFores

In [16]:
index = 0
for cmd in std_learn[4:5]:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -m 80) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/meta.AdaptiveRandomForestRegressor-m80


In [9]:
index = 0
for cmd in slim_learn[4:5]:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor -m 80) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\result_sup_slim.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/slim_meta.AdaptiveRandomForestRegressor-m80


In [10]:
learners_clf = [
    'functions.MajorityClass',
    'functions.NoChange',
    'lazy.SAMkNN',
    'bayes.NaiveBayes',
    'meta.AdaptiveRandomForest',
    'meta.AdaptiveRandomForest -m 80',
    'trees.HoeffdingAdaptiveTree',
    'meta.OzaBagAdwin',

]
generated_sets = ['generators.RandomRBFGenerator', 'generators.HyperplaneGenerator','generators.LEDGenerator']

index = 0
cmds = []
for gen_set in generated_sets:
    cmds += [f'EvaluatePrequential -l ({l}) -s ({gen_set}) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/{l.replace(" ","")}_{gen_set}.csv' for l in learners_clf]


In [11]:
index = 0
for cmd in cmds:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'generator_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequential -l (functions.MajorityClass) -s (generators.RandomRBFGenerator) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/functions.MajorityClass_generators.RandomRBFGenerator.csv
Running cmd EvaluatePrequential -l (functions.NoChange) -s (generators.RandomRBFGenerator) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/functions.NoChange_generators.RandomRBFGenerator.csv
Running cmd EvaluatePrequential -l (lazy.SAMkNN) -s (generators.RandomRBFGenerator) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/lazy.SAMkNN_generators.RandomRBFGenerator.csv
Running cmd EvaluatePrequential -l (bayes.NaiveBayes) -s (generators.RandomRBFGenerator) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/bayes.NaiveBayes_generators.RandomRBFGenerator.csv
Running cmd EvaluatePrequential -l (meta.AdaptiveRandomForest) -s (generators.RandomRBFGenerator) -i 50000 -f 1000 -q 1000 -d runs/tuning/generator2/meta.AdaptiveRandomForest_generators.RandomRBFGenerator.csv
Running cmd Evaluat

In [17]:
learners_clf = [
    'functions.MajorityClass',
    'functions.NoChange',
    'lazy.SAMkNN',
    'bayes.NaiveBayes',
    'meta.AdaptiveRandomForest',
    'meta.AdaptiveRandomForest -m 80',
    'trees.HoeffdingAdaptiveTree',
    'meta.OzaBagAdwin',

]
rest_sets = ['WISDM', 'electricity_arff']

index = 0
cmds = []
wisdm_fullpath =  f'C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\{rest_sets[0]}.arff'
electricity_fullpath =  f'C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\{rest_sets[1]}.arff'
cmds += [f'EvaluatePrequential -l ({l}) -s (ArffFileStream -f {wisdm_fullpath})-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/{l.replace(" ","")}_wisdm.csv' for l in learners_clf]
# cmds += [f'EvaluatePrequential -l ({l}) -s (ArffFileStream -f {electricity_fullpath}) -i 45000 -f 1000 -q 1000 -d runs/tuning/wisdm/{l.replace(" ","")}_electricity.csv' for l in learners_clf]


In [18]:
cmds

['EvaluatePrequential -l (functions.MajorityClass) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/functions.MajorityClass_wisdm.csv',
 'EvaluatePrequential -l (functions.NoChange) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/functions.NoChange_wisdm.csv',
 'EvaluatePrequential -l (lazy.SAMkNN) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/lazy.SAMkNN_wisdm.csv',
 'EvaluatePrequential -l (bayes.NaiveBayes) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tu

In [19]:
max_together = 10
index = 0
for cmd in cmds:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'generator_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequential -l (functions.MajorityClass) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/functions.MajorityClass_wisdm.csv
Running cmd EvaluatePrequential -l (functions.NoChange) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/functions.NoChange_wisdm.csv
Running cmd EvaluatePrequential -l (lazy.SAMkNN) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/lazy.SAMkNN_wisdm.csv
Running cmd EvaluatePrequential -l (bayes.NaiveBayes) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\WISDM.arff)-e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d r

In [7]:
learners = [
    'rules.functions.TargetMean',
    'rules.functions.FadingTargetMean',
    'functions.AdaGrad',
    'meta.AdaptiveRandomForestRegressor',
    'meta.AdaptiveRandomForestRegressor -m 80',
    'rules.AMRulesRegressor',
    'rules.functions.Perceptron',
    'trees.ARFFIMTDD',

]
air_set = r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff'


air_learn = [f'EvaluatePrequentialRegression -l ({l}) -s (ArffFileStream -f {air_set} ) -i 50000 -f 500 -q 500 -d runs/tuning/airlines/{l.replace(" ","")}' for l in learners]


In [9]:
index = 0
max_together = 3
for cmd in air_learn:
    print(f"Running cmd {cmd}")
    out = run_command(cmd,f'baseline_{index}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
    index+= 1

Running cmd EvaluatePrequentialRegression -l (rules.functions.TargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/airlines/rules.functions.TargetMean
Running cmd EvaluatePrequentialRegression -l (rules.functions.FadingTargetMean) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/airlines/rules.functions.FadingTargetMean
Running cmd EvaluatePrequentialRegression -l (functions.AdaGrad) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/airlines/functions.AdaGrad
Running cmd EvaluatePrequentialRegression -l (meta.AdaptiveRandomForestRegressor) -s (ArffFileStream -f C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\airlines.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/airlines/meta.AdaptiveRandomForestRegressor
Running cmd EvaluatePrequentialRegression 

In [42]:
cmd = 'EvaluatePrequentialRegression -l (rules.functions.TargetMean) -s (ArffFileStream -f C:\\Users\\kosma\\Desktop\\MAGISTER\\FeatExtream\\data\\airlines.arff ) -i 32000 -f 1000 -q 1000 -d runs/tuning/airlines/rules.functions.TargetMean'

run_command(cmd,f'test_airlines')

<subprocess.Popen at 0x18c3b1093c8>

In [6]:
file =  r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\wisdm.arff'
task = 'EvaluatePrequential'
learner = 'BufferLearner'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'(meta.AdaptiveRandomForest -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
stream = f'(ArffFileStream -f {file})'


w = 200
i = 5400
f = 200
q = 200
def generate_command(n, extractor , cluster_num, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}wisdm_{extractor}_{n}_{cluster_num}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -e (WindowClassificationPerformanceEvaluator -w {w}) -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p -1 -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [7]:
generate_command(10, 'cluster', 8, 'clustree', target_dir = 'wisdm/buffer/')

'EvaluatePrequential -l (BufferLearner -l (meta.AdaptiveRandomForest -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01)) -n 10 -m (meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01)) -c cluster -r 1 -b random -p -1 -q clustree -x 8) -s (ArffFileStream -f C:/Users/kosma/Desktop/MAGISTER/FeatExtream/data/wisdm.arff) -e (WindowClassificationPerformanceEvaluator -w 200) -i 5400 -f 200 -q 200 -d runs/tuning/wisdm/buffer/wisdm_cluster_10_8_clustree_random_100.csv'

In [8]:
max_together = 10
for n in [1,5,10,25,50,100]:
    cmd = generate_command(n, 'cluster', 8, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'wisdm_cluster_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 5
Running with n = 10
Running with n = 25
Running with n = 50
Running with n = 100


In [9]:
max_together = 10
for n in [1,5,10,25,50,100]:
    cmd = generate_command(n, 'cep', 8, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'wisdm_cep_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 5
Running with n = 10
Running with n = 25
Running with n = 50
Running with n = 100


In [10]:
max_together = 10
for c in [3,4,6]:
    cmd = generate_command(1, 'cep', c, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_cep_c{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
for c in [3,4,6]:
    cmd = generate_command(1, 'cep', c, 'clustream', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_cep_c{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 2
Running with c = 3
Running with c = 4
Running with c = 6


In [22]:
run_command(cmd,f'tss_{1}')

<subprocess.Popen at 0x2162ecc7860>

In [13]:
max_together = 10
for c in [4,6]:
    cmd = generate_command(1, 'cluster', c, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_cluster_c{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
for c in [4,6]:
    cmd = generate_command(1, 'cluster', c, 'clustream', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_cluster_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 4
Running with c = 6
Running with c = 4
Running with c = 6


In [14]:
max_together = 10
for c in [4,6]:
    cmd = generate_command(1, 'featureExtraction', c, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_featureExtraction_c{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
for c in [4,6]:
    cmd = generate_command(1, 'featureExtraction', c, 'clustream', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_featureExtraction_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with c = 4
Running with c = 6
Running with c = 4
Running with c = 6


In [16]:
for r in rs:
    cmd = generate_command(1, 'featureExtraction', c, 'clustree', buffer_type='relevance', r=r, target_dir = 'wisdm/buffer/')

    print(f"Running with r = {r}")
    out = run_command(cmd,f'featureExtractionp_r2{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [17]:
for r in rs:
    cmd = generate_command(1, 'featureExtraction', c, 'clustree', buffer_type='random', r=r, target_dir = 'wisdm/buffer/')

    print(f"Running with r = {r}")
    out = run_command(cmd,f'featureExtractionp_r{r}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with r = 0.1
Running with r = 0.25
Running with r = 0.5


In [None]:
max_together = 10
for r in [0.1,0.25,0.5]:
    cmd = generate_command(1, 'featureExtraction', c, 'clustree', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_featureExtraction_c{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()
for c in [4,6]:
    cmd = generate_command(1, 'featureExtraction', c, 'clustream', target_dir = 'wisdm/buffer/')
    print(f"Running with c = {c}")
    out = run_command(cmd,f'wisdm_featureExtraction_c2{c}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

In [18]:
file =  r'C:\Users\kosma\Desktop\MAGISTER\FeatExtream\data\electricity_arff.arff'
task = 'EvaluatePrequential'
learner = 'BufferLearner'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'(meta.AdaptiveRandomForest -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
stream = f'(ArffFileStream -f {file})'


i = 45000
f = 1000
q = 1000
def generate_command(n, extractor , cluster_num, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}wisdm_{extractor}_{n}_{cluster_num}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p -1 -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [19]:
max_together = 10
for n in [1,5,10]:
    cmd = generate_command(n, 'cluster', 8, 'clustree', target_dir = 'electricity/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'wisdm_cluster_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 5
Running with n = 10


In [21]:
max_together = 10
for n in [1,3,5,10]:
    cmd = generate_command(n, 'cep', 4, 'clustream', target_dir = 'electricity/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'wisdm_cep_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10


In [22]:
max_together = 10
for n in [1,3,5, 10]:
    cmd = generate_command(n, 'featureExtraction', 4, 'clustream', target_dir = 'electricity/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'wisdm_featureExtraction_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 1
Running with n = 3
Running with n = 5
Running with n = 10


In [29]:
task = 'EvaluatePrequential'
learner = 'BufferLearner'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'lazy.SAMkNN'
stream = f'generators.RandomRBFGenerator'


i = 50000
f = 1000
q = 1000
def generate_command(n, extractor , cluster_num, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}wisdm_{extractor}_{n}_{cluster_num}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p -1 -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [30]:
max_together = 10
for n in [10]:
    cmd = generate_command(n, 'featureExtraction', 4, 'clustream', target_dir = 'generated/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'generated_featureExtraction_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 10


In [31]:
max_together = 10
for n in [10]:
    cmd = generate_command(n, 'cep', 4, 'clustream', target_dir = 'generated/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'generated_cep_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 10


In [32]:
max_together = 10
for n in [10]:
    cmd = generate_command(n, 'cluster', 6, 'clustree', target_dir = 'generated/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'generated_cluster_n{n}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 10


In [9]:
task = 'EvaluatePrequential'
learner = 'BufferLearner'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'meta.OzaBagAdwin'
stream = f'generators.HyperplaneGenerator'


i = 50000
f = 1000
q = 1000
def generate_command(n, extractor , cluster_num, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}hyperplane_{extractor}_{n}_{cluster_num}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p -1 -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [34]:
max_together = 10
for t in ['cluster', 'cep', 'featureExtraction']:
    cmd = generate_command(10, t, 6, 'clustree', target_dir = 'generated/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'generated_{t}_n{10}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 10
Running with n = 10
Running with n = 10


In [36]:
task = 'EvaluatePrequential'
learner = 'BufferLearner'

relevance_learner = '(meta.AdaptiveRandomForestRegressor -x (ADWINChangeDetector -a 0.001) -p (ADWINChangeDetector -a 0.01))'
buffer_learner = f'meta.OzaBagAdwin'
stream = f'generators.LEDGenerator'


i = 50000
f = 1000
q = 1000
def generate_command(n, extractor , cluster_num, cluster_type='clustree', buffer_type = 'random', r = 1, target_dir = ''):
    pathlib.Path(f'runs/tuning/{target_dir}').mkdir(parents=True, exist_ok=True)
    
    target_file = f'runs/tuning/{target_dir}led_{extractor}_{n}_{cluster_num}_{cluster_type}_{buffer_type}_{int(r*100)}.csv'
    start_cmd = f'{task} -l ({learner} -l {buffer_learner} -n {n} -m {relevance_learner}'
    end_cmd = f') -s {stream} -i {i} -f {f} -q {q} -d {target_file}'
    cmd = f'{start_cmd} -c {extractor} -r {r} -b {buffer_type} -p -1 -q {cluster_type} -x {cluster_num}{end_cmd}'
    cmd = cmd.replace("\\","/")
    return cmd

In [10]:
max_together = 10
for t in ['cluster', 'cep', 'featureExtraction']:
    cmd = generate_command(10, t, 6, 'clustree', target_dir = 'generated/buffer/')
    print(f"Running with n = {n}")
    out = run_command(cmd,f'led_{t}_n{10}')
    outs.append(out)

    if len(outs) >= max_together:
        outs[len(outs) - max_together].communicate()

Running with n = 100
Running with n = 100
Running with n = 100
