In [1]:
import sys
import os
import numpy as np
from pathlib import Path
os.chdir(Path(os.getcwd()).resolve().parents[1])
import setup
from methods import grid_selection_amanda_dynamic
from sklearn.model_selection import ParameterGrid



def writeResults(datasetID, bestScore, bestParams, clfName):
    path = "results/batch/dynamic/gridsearch_amanda_dynamic_EVL-{}.txt".format(clfName)
    file = open(path,"a") 
    string = "{}: {} using {} \n".format(datasetID, bestScore, bestParams)
    file.write(string)
    file.close() 


def main():
    is_windows = sys.platform.startswith('win')
    sep = '\\'
    
    if is_windows == False:
        sep = '/'

    path = os.getcwd()+sep+'data'+sep
    
    #loading datasets
    datasets = [setup.loadCDT, setup.loadCHT, setup.load2CDT, setup.load2CHT, setup.load4CR, setup.load4CRE_V1, 
                setup.load4CRE_V2, setup.load5CVT, setup.loadCSurr, setup.load4CE1CF, setup.loadUG_2C_2D, setup.loadMG_2C_2D, 
                setup.loadFG_2C_2D, setup.loadUG_2C_3D, setup.loadUG_2C_5D, setup.loadGEARS_2C_2D, setup.loadCheckerBoard, 
                setup.loadElecData, setup.loadKeystroke, setup.loadNOAADataset]
    
    arrClfName = ['SGD', 'NB', 'RF', 'LP', 'KNN']
    for clfName in arrClfName:
        print("**************** BEGIN of {} results ****************".format(clfName))
        batches=1
        poolSize = None
        isBatchMode = True
        #testing grid search
        for i in range(len(datasets)):
            finalScore = 0
            best_grid={}
            dataValues, dataLabels, description = datasets[i](path, sep)

            #Train-test split
            availableQty = int(0.05*len(dataLabels))
            availableLabels = dataLabels[:availableQty] 
            availableData = dataValues[:availableQty]

            # 70/30 train/test data
            initialLabeledData = int(0.7*len(availableLabels))
            sizeOfBatch = int((len(availableLabels)-initialLabeledData)/batches)

            print("{}: {} batches of {} instances".format(description, batches, sizeOfBatch))

            tuned_params = [{"sizeOfBatch":[sizeOfBatch], 
                             "batches":[batches], "poolSize":[poolSize], "isBatchMode":[isBatchMode], 
                             "initialLabeledData":[initialLabeledData], "clfName":[clfName]}]
            if clfName == 'LP' or clfName == 'KNN':
                tuned_params[0].update({"K":[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]})

            for g in ParameterGrid(tuned_params):
                averageAccuracy=0
                gs = grid_selection_amanda_dynamic.run(**g)

                try:
                    gs.fit(availableData, availableLabels)
                    averageAccuracy = np.mean(gs.predict())
                    print(averageAccuracy, g)
                    if finalScore < averageAccuracy:
                        finalScore = averageAccuracy
                        best_grid = g
                except Exception:
                    print("An error occured in ", description, g)
                    #raise Exception

            print(finalScore)
            print(best_grid)
            print("=======================================================================================================")

            writeResults(description, finalScore, best_grid, clfName)
        print("******** END of {} results ********".format(clfName))
    
if __name__ == "__main__":
    main()


**************** BEGIN of SGD results ****************
One Class Diagonal Translation. 2 Dimensional data.: 1 batches of 240 instances
100.0 {'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
100.0
{'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
One Class Horizontal Translation. 2 Dimensional data.: 1 batches of 240 instances
90.42 {'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
90.42
{'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Two Classes Diagonal Translation. 2 Dimensional data: 1 batches of 240 instances
97.92 {'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
97.92
{'batches': 1, 'clfName': 'SGD', 'initialLabeledData': 560, 'isBatchMod

One Class Diagonal Translation. 2 Dimensional data.: 1 batches of 240 instances
100.0 {'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
100.0
{'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
One Class Horizontal Translation. 2 Dimensional data.: 1 batches of 240 instances
95.42 {'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
95.42
{'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Two Classes Diagonal Translation. 2 Dimensional data: 1 batches of 240 instances
95.0 {'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
95.0
{'batches': 1, 'clfName': 'NB', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Two Classes Hor

98.75 {'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
98.75
{'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
One Class Horizontal Translation. 2 Dimensional data.: 1 batches of 240 instances
94.17 {'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
94.17
{'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Two Classes Diagonal Translation. 2 Dimensional data: 1 batches of 240 instances
95.0 {'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
95.0
{'batches': 1, 'clfName': 'RF', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Two Classes Horizontal Translation. 2 Dimensional data.: 1 batches of 240 instances
83.75 {'bat

83.88 {'batches': 1, 'clfName': 'RF', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
83.88
{'batches': 1, 'clfName': 'RF', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
******** END of RF results ********
**************** BEGIN of LP results ****************
One Class Diagonal Translation. 2 Dimensional data.: 1 batches of 240 instances
98.33 {'K': 2, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
97.92 {'K': 3, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
100.0 {'K': 4, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
99.58 {'K': 5, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
100.0 {'K': 6, 'batches': 1, 'clfName': 'LP', 'initial

100.0 {'K': 7, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 8, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 9, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 10, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 11, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 12, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0
{'K': 3, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
Four Classes Rotating with Expansion V1. Bidimensional.: 1 

98.77 {'K': 2, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
97.88 {'K': 3, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.5 {'K': 4, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.08 {'K': 5, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.23 {'K': 6, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.34 {'K': 7, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.77 {'K': 8, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 6063, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2599}
96.88 {'K': 9, 'batches': 1, 'clfName': 'LP', 'initialLabeledDa

100.0 {'K': 10, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 11, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 12, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0
{'K': 2, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
Two 5-dimensional Unimodal Gaussian Classes.: 1 batches of 3000 instances
99.8 {'K': 2, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
99.77 {'K': 3, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
99.73 {'K': 4, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOf

NOAA dataset. Eight  features. Two classes.: 1 batches of 273 instances
75.82 {'K': 2, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
75.82 {'K': 3, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
76.92 {'K': 4, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
71.43 {'K': 5, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
71.43 {'K': 6, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
71.43 {'K': 7, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
71.43 {'K': 8, 'batches': 1, 'clfName': 'LP', 'initialLabeledData': 634, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 273}
71.4

84.17 {'K': 11, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
83.75 {'K': 12, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
85.83
{'K': 2, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 560, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 240}
Four Classes Rotating Separated. Bidimensional.: 1 batches of 2166 instances
99.95 {'K': 2, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 3, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
99.95 {'K': 4, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 2166}
100.0 {'K': 5, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 5054, 'isBatchMode': True, 'poolSize': None, 'si

97.11 {'K': 5, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.83 {'K': 6, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.59 {'K': 7, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
98.19 {'K': 8, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.71 {'K': 9, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.83 {'K': 10, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.71 {'K': 11, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 1934, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 830}
97.95 {'K': 12, 'batches': 1, 'clfName': 'KNN', 'initialLabe

100.0 {'K': 2, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 3, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 4, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 5, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 6, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 7, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 8, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 7000, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 3000}
100.0 {'K': 9, 'batches': 1, 'clfName': 'KNN', 'initial

75.12 {'K': 10, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
75.12 {'K': 11, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
75.12 {'K': 12, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
75.12
{'K': 7, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
Keyboard patterns database. 10 features. 4 classes.: 1 batches of 24 instances
91.67 {'K': 2, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 56, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 24}
91.67 {'K': 3, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 56, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 24}
83.33 {'K': 4, 'batches': 1, 'clfName': 'KNN', 'initialLabeledData': 56, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch