In [1]:
import sys
import os
import numpy as np
from pathlib import Path
os.chdir(Path(os.getcwd()).resolve().parents[1])
import setup
from methods import grid_selection_amanda_fixed
from sklearn.model_selection import ParameterGrid



def writeResults(datasetID, bestScore, bestParams):
    file = open("results/gridsearch_amanda_fixed_batch-EVL-SAG.txt","a") 
    string = "{}: {} using {} \n".format(datasetID, bestScore, bestParams)
    file.write(string)
    file.close() 


def main():
    is_windows = sys.platform.startswith('win')
    sep = '\\'
    
    if is_windows == False:
        sep = '/'

    path = os.getcwd()+sep+'data'+sep
    
    #loading sinthetic dataset
    '''datasets = [setup.loadCDT, setup.loadCHT, setup.load2CDT, setup.load2CHT, setup.load4CR, setup.load4CRE_V1, 
                setup.load4CRE_V2, setup.load5CVT, setup.loadCSurr, setup.load4CE1CF, setup.loadUG_2C_2D, setup.loadMG_2C_2D, 
                setup.loadFG_2C_2D, setup.loadUG_2C_3D, setup.loadUG_2C_5D, setup.loadGEARS_2C_2D, setup.loadCheckerBoard, 
                setup.loadElecData, setup.loadKeystroke, setup.loadNOAADataset]
    '''
    #real datasets
    #datasets = [setup.loadCSurr]#
    datasets = [setup.loadElecData, setup.loadKeystroke, setup.loadNOAADataset]
   
    batches=1
    poolSize = None
    isBatchMode = True
    #testing grid search
    for i in range(len(datasets)):
        finalScore = 0
        best_grid={}
        dataValues, dataLabels, description = datasets[i](path, sep)
        
        #Train-test split
        availableQty = int(0.05*len(dataLabels))
        availableLabels = dataLabels[:availableQty] 
        availableData = dataValues[:availableQty]
        
        # 70/30 train/test data
        initialLabeledData = int(0.7*len(availableLabels))
        sizeOfBatch = int((len(availableLabels)-initialLabeledData)/batches)
        
        print("{}: {} batches of {} instances".format(description, batches, sizeOfBatch))
        
        tuned_params = [{"excludingPercentage" : [0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5],
                         "sizeOfBatch":[sizeOfBatch], 
                         "batches":[batches], "poolSize":[poolSize], "isBatchMode":[isBatchMode], 
                         "initialLabeledData":[initialLabeledData]}]
        
        for g in ParameterGrid(tuned_params):
            averageAccuracy=0
            gs = grid_selection_amanda_fixed.run(**g)
            
            try:
                gs.fit(availableData, availableLabels)
                averageAccuracy = np.mean(gs.predict())
                print(averageAccuracy, g)
                if finalScore < averageAccuracy:
                    finalScore = averageAccuracy
                    best_grid = g
            except Exception:
                print("An error occured in ", description, g)
                #raise Exception
            
        print(finalScore)
        print(best_grid)
        print("=======================================================================================================")
        
        writeResults(description, finalScore, best_grid)
    
if __name__ == "__main__":
    main()


Electricity data. 7 features. 2 classes.: 1 batches of 414 instances
74.64 {'batches': 1, 'excludingPercentage': 0.9, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
74.64 {'batches': 1, 'excludingPercentage': 0.85, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
74.64 {'batches': 1, 'excludingPercentage': 0.8, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
74.15 {'batches': 1, 'excludingPercentage': 0.75, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
73.91 {'batches': 1, 'excludingPercentage': 0.7, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
74.15 {'batches': 1, 'excludingPercentage': 0.65, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'sizeOfBatch': 414}
74.64 {'batches': 1, 'excludingPercentage': 0.6, 'initialLabeledData': 963, 'isBatchMode': True, 'poolSize': None, 'si