In [1]:
import sys
import os
from pathlib import Path
os.chdir(Path(os.getcwd()).resolve().parents[1])

import matplotlib.pyplot as plt
from source import plotFunctions
from timeit import default_timer as timer
import numpy as np
import setup
from source import metrics
from methods import scargc_1NN
from methods import sliding_knn
from methods import microcluster
from methods import microcluster2
from methods import microcluster3

%matplotlib inline
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import interactive
from IPython.display import Audio, display



class Experiment():
    def __init__(self, method):
        #commom for all experiments
        self.method = method
        #self.initialLabeledDataPerc=0.05 #150 instances for keystroke database and 0.05 % for artificial databases
        #self.classes=[0, 1]
        self.usePCA=False
        #used only by gmm / kde process
        self.densityFunction='gmm'
        self.excludingPercentage = 0.9
        self.K_variation = 2
        self.classifier='cluster_and_label'
        #used in alpha-shape version only
        self.CP=0.65
        self.alpha=0.5
        #used in kmeans_svm and compose only
        self.useSVM=False
        self.isImbalanced=False


def doExperiments(dataValues, dataLabels, datasetDescription, arrAccSCARGC, finalAccSCARGC, experiments, numberOfTimes, batches, labeledData, poolSize, isBatchMode):
    listOfAccuracies = []
    listOfMethods = []
    sizeOfBatch = int((len(dataLabels)-labeledData)/batches)#int(len(dataLabels)/batches)
    
    print(datasetDescription)
    print("{} batches of {} instances".format(batches, sizeOfBatch))
    print("\n\n")
    
    for name, e in experiments.items():
        CoreX = []
        CoreY = []
        elapsedTime = []
        accTotal = []
        accuracies=[]
        classes = list(set(dataLabels))#getting all possible classes existent in data
        e.sizeOfBatch = sizeOfBatch
        e.batches = batches
        e.dataLabels = dataLabels
        e.dataValues = dataValues
        e.clfName = 'knn' #rf = random forests, cl = cluster and label, knn = k-nn, svm = svm

        for i in range(numberOfTimes):
            start = timer()
            #accuracy per step
            algorithmName, accuracies, CoreX, CoreY = e.method.start(dataValues=e.dataValues, dataLabels=e.dataLabels, 
                                                                     usePCA=e.usePCA, classes=classes, classifier=e.classifier, 
                                                                     densityFunction=e.densityFunction, batches=e.batches, 
                                                                     sizeOfBatch = e.sizeOfBatch, initialLabeledData=labeledData, 
                                                                     excludingPercentage=e.excludingPercentage, 
                                                                     K_variation=e.K_variation, CP=e.CP, alpha=e.alpha, 
                                                                     clfName=e.clfName , useSVM=e.useSVM, isImbalanced=e.isImbalanced, 
                                                                     poolSize=poolSize, isBatchMode=isBatchMode)
            end = timer()
            averageAccuracy = np.mean(accuracies)

            #elapsed time per step
            elapsedTime.append(end - start)
            
            accTotal.append(averageAccuracy)
        
        listOfAccuracies.append(accuracies)
        listOfMethods.append(algorithmName)
        #print("Total of ", numberOfTimes, " experiment iterations with an average accuracy of ", np.mean(accTotal))
        print("Average execution time: ", np.mean(elapsedTime))
        metrics.finalEvaluation(accuracies, batches)
    
        #print data distribution in step t
        initial = (batches*sizeOfBatch)-sizeOfBatch
        final = initial + sizeOfBatch
        plotFunctions.plot(dataValues[initial:final], dataLabels[initial:final], CoreX, CoreY, batches)
        print("\n\n")
    print("SCARGC accuracy: ",finalAccSCARGC)
    listOfAccuracies.append(arrAccSCARGC)
    listOfMethods.append("SCARGC")
    
    plotFunctions.plotBoxplot(listOfAccuracies, listOfMethods)
    
        
def accSCARGC(path, sep, key, steps):
    
    resultsSCARGC_1, resultsSCARGC_2 = setup.loadSCARGCBoxplotResults(path, sep)
    res = resultsSCARGC_1[key]
    res = [ res[i::steps] for i in range(steps) ]
    arrAcc = []
    for i in range(steps):
        arrAcc.append(sum(res[i])/len(res[i])*100)
        #print(r[i])
    #print(sum(r)/steps)
    finalAcc = sum(arrAcc)/steps
    return arrAcc, finalAcc



def main():
    experiments = {}
    is_windows = sys.platform.startswith('win')
    sep = '\\'

    if is_windows == False:
        sep = '/'

    path = os.getcwd()+sep+'data'+sep
    
    initialLabeledData = 150
    steps = 100
    poolSize = 300
    isBatchMode = False
    arrAccSCARGC, finalAccSCARGC = accSCARGC(path, sep, '2CDT', steps)
    
    #sinthetic
    dataValues, dataLabels, description = setup.loadCDT(path, sep)
    dataValues, dataLabels, description = setup.loadCHT(path, sep)
    dataValues, dataLabels, description = setup.load2CDT(path, sep)
    dataValues, dataLabels, description = setup.load2CHT(path, sep)
    '''dataValues, dataLabels, description = setup.load4CR(path, sep)
    dataValues, dataLabels, description = setup.load4CRE_V1(path, sep)
    dataValues, dataLabels, description = setup.load4CRE_V2(path, sep)
    dataValues, dataLabels, description = setup.load5CVT(path, sep)
    dataValues, dataLabels, description = setup.loadCSurr(path, sep)
    dataValues, dataLabels, description = setup.load4CE1CF(path, sep)
    dataValues, dataLabels, description = setup.loadUG_2C_2D(path, sep)
    dataValues, dataLabels, description = setup.loadMG_2C_2D(path, sep)
    dataValues, dataLabels, description = setup.loadFG_2C_2D(path, sep)
    dataValues, dataLabels, description = setup.loadUG_2C_3D(path, sep)
    dataValues, dataLabels, description = setup.loadUG_2C_5D(path, sep)
    dataValues, dataLabels, description = setup.loadGEARS_2C_2D(path, sep)
    dataValues, dataLabels, description = setup.loadCheckerBoard(path, sep)
    #real
    dataValues, dataLabels, description = setup.loadKeystroke(path, sep)
    dataValues, dataLabels, description = setup.loadNOAADataset(path, sep)
    dataValues, dataLabels, description = setup.loadElecData(path, sep)'''
    
    
    experiments[2] = Experiment(scargc_1NN)
    #experiments[2] = Experiment(microcluster)
    #experiments[2] = Experiment(microcluster2) #sem condições, demorando demais (MAIS DE UM DIA)
    #experiments[2] = Experiment(microcluster3) # uma hora e meia pra processar 16000 pontos
    
    
    #running pywidget
    '''
    def run(batches):
        doExperiments(dataValues, dataLabels, description, experiments, 1, batches, 6250)
        
    v = interact(run, batches=(1, 100, 1)); #8 batches for keystroke and 100 batches for artificial datasets
    display(v)
    '''
    #params: X, y, method, num of experiment repetitions, num of batches, num of labeled data
    doExperiments(dataValues, dataLabels, description, arrAccSCARGC, finalAccSCARGC, experiments, 1, steps, initialLabeledData, poolSize, isBatchMode)
    
    
    
if __name__ == "__main__":
    main()

Results from SCARGC algorithm (for boxplot and accuracy timelime).
Two Classes Horizontal Translation. 2 Dimensional data.
100 batches of 158 instances



METHOD: SCARGC with 1-NN
centroids_ant:  [[-0.12564  2.3224   0.     ]
 [ 3.8538   1.8467   1.     ]]
centroid:  [[-0.12564  2.3224 ]
 [ 3.8538   1.8467 ]]
centroids_ant:  [[  1.21160610e-01   1.83586915e+00   0.00000000e+00]
 [  3.79197294e+00   2.02679035e+00   1.00000000e+00]
 [ -2.23969521e-03   2.07913458e+00   0.00000000e+00]
 [  3.82288647e+00   1.93674518e+00   1.00000000e+00]]
centroid:  [[ -2.23969521e-03   2.07913458e+00]
 [  3.82288647e+00   1.93674518e+00]]


  return_n_iter=True)


centroids_ant:  [[ 0.06936858  1.80274695  0.        ]
 [ 4.52875067  2.15073484  1.        ]
 [ 0.0952646   1.81930805  0.        ]
 [ 4.1603618   2.0887626   1.        ]]
centroid:  [[ 0.0952646   1.81930805]
 [ 4.1603618   2.0887626 ]]
centroids_ant:  [[ 0.87940581  1.75986322  0.        ]
 [ 5.16116667  2.16863231  1.        ]
 [ 0.4743872   1.78130509  0.        ]
 [ 4.84495867  2.15968357  1.        ]]
centroid:  [[ 0.4743872   1.78130509]
 [ 4.84495867  2.15968357]]
centroids_ant:  [[ 1.17134869  1.81385639  0.        ]
 [ 5.129996    1.86473621  1.        ]
 [ 1.02537725  1.78685981  0.        ]
 [ 5.14558133  2.01668426  1.        ]]
centroid:  [[ 1.02537725  1.78685981]
 [ 5.14558133  2.01668426]]
centroids_ant:  [[ 1.69150985  2.08340889  0.        ]
 [ 5.97828372  2.08492692  1.        ]
 [ 1.43142927  1.94863264  0.        ]
 [ 5.55413986  1.97483157  1.        ]]
centroid:  [[ 1.43142927  1.94863264]
 [ 5.55413986  1.97483157]]
centroids_ant:  [[ 1.62681805  2.16122923  0

centroids_ant:  [[ 14.14325875   1.84055849   0.        ]
 [ 18.04151282   1.911695     1.        ]
 [ 13.72288066   1.91693351   0.        ]
 [ 17.84441266   2.00018781   1.        ]]
centroid:  [[ 13.72288066   1.91693351]
 [ 17.84441266   2.00018781]]
centroids_ant:  [[ 13.95350617   1.68105537   0.        ]
 [ 17.80971429   2.12361427   1.        ]
 [ 14.04838246   1.76080693   0.        ]
 [ 17.92561355   2.01765464   1.        ]]
centroid:  [[ 14.04838246   1.76080693]
 [ 17.92561355   2.01765464]]
centroids_ant:  [[ 14.66893846   2.43789369   0.        ]
 [ 18.59976344   1.72476798   1.        ]
 [ 14.31122232   2.05947453   0.        ]
 [ 18.20473886   1.92419113   1.        ]]
centroid:  [[ 14.31122232   2.05947453]
 [ 18.20473886   1.92419113]]
centroids_ant:  [[ 14.85471429   2.25613879   0.        ]
 [ 19.00238636   2.34827431   1.        ]
 [ 14.76182637   2.34701624   0.        ]
 [ 18.8010749    2.03652114   1.        ]]
centroid:  [[ 14.76182637   2.34701624]
 [ 18.8010

centroids_ant:  [[ 12.52201389   1.97842888   0.        ]
 [ 16.752        2.06542743   1.        ]
 [ 12.72254591   1.77080836   0.        ]
 [ 16.93740123   1.96468331   1.        ]]
centroid:  [[ 12.72254591   1.77080836]
 [ 16.93740123   1.96468331]]
centroids_ant:  [[ 12.0375675    1.69960416   0.        ]
 [ 16.05307692   2.01654415   1.        ]
 [ 12.27979069   1.83901652   0.        ]
 [ 16.40253846   2.04098579   1.        ]]
centroid:  [[ 12.27979069   1.83901652]
 [ 16.40253846   2.04098579]]
centroids_ant:  [[ 11.8915358    2.10733544   0.        ]
 [ 16.28384416   2.02445055   1.        ]
 [ 11.96455165   1.9034698    0.        ]
 [ 16.16846054   2.02049735   1.        ]]
centroid:  [[ 11.96455165   1.9034698 ]
 [ 16.16846054   2.02049735]]
centroids_ant:  [[ 11.56365625   2.32644087   0.        ]
 [ 16.16773077   1.98591786   1.        ]
 [ 11.72759603   2.21688816   0.        ]
 [ 16.22578746   2.0051842    1.        ]]
centroid:  [[ 11.72759603   2.21688816]
 [ 16.2257

ValueError: too many values to unpack (expected 4)

In [None]:
import numpy as np
a = [[1, 2], [3, 4], [4, 5]]

b = 2 * np.array(a)
c = np.array([1, 4, 2, 1, 3])
d = (-c).argsort()[:2]
e = np.random.randint(2, size=1)
e