# Imports

In [6]:
import itertools
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
import one_class_learning as ocl
import pandas as pd
import numpy as np
import sys
import json
import logging 
import time
from datetime import datetime

from Diabolo import Diabolo

SyntaxError: invalid syntax (Diabolo.py, line 47)

# Definitions

In [2]:
dict_algorithms = {}
dict_algorithms['LocalOutlierFactor'] = LocalOutlierFactor
dict_algorithms['OneClassSVM'] = OneClassSVM
dict_algorithms['EllipticEnvelope'] = EllipticEnvelope
dict_algorithms['IsolationForest'] = IsolationForest


# Functions

In [3]:
def generate_parameters_list(parameters): 
    
  all_parameters = []
  for values in parameters.values(): 
    all_parameters.append(values)
  all_permutations = []
  for combination in itertools.product(*all_parameters):
    all_permutations.append(combination)
  parameters_list = []
  for combination in all_permutations: 
    param = {}
    for i, key in enumerate(parameters.keys()): 
      param[key] = combination[i]
    parameters_list.append(param)
  return parameters_list
    

In [4]:
def log(message): 
    now = datetime.now()
    with open('error.log', 'a') as file:
        file.write(now.strftime("%Y/%m/%d, %H:%M:%S") + message + '\n') 


In [5]:
def load_data(path): 

    df = pd.read_csv(path)
    data = df.to_numpy()
    X = data[:,:-1]
    y = data[:,-1]

    return X,y

# Test Área

In [6]:
config = {
    'path_dataset': '/home/rafael/Downloads/iris.csv',
    'path_results': '/home/rafael/Área de Trabalho/Projetos/TextCategorizationToolPython/saida/resultados_teste.csv',
    'validation': {
        'number_trials': 10,
        'number_labeled_examples': [1, 5, 10, 20, 30],
        'split_type': 'random',
    },
    'algorithms': [
        {
            'name': 'LocalOutlierFactor',
            'parameters': {
                'n_neighbors' : [1, 5, 9, 13, 17, 21],
                'metric': ['cosine','euclidean'],
                'novelty': [True],
                'n_jobs': [4]
            }
        },
        {
            'name': 'OneClassSVM',
            'parameters': {
                'nu': [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8,  0.85, 0.9, 0.95],
                'gamma': ['scale','auto'],
                'kernel': ['linear', 'rbf'],
                'max_iter': [100]
            }
        },
        {
            'name': 'EllipticEnvelope',
            'parameters': {
                'assume_centered': [True, False],
                'contamination': [0.1, 0.2, 0.3, 0.4],
                'random_state' : [42]
            }
        },
        {
            'name': 'IsolationForest',
            'parameters': {
                'n_estimators' : [10,30,50,70,90],
                'n_jobs': [4],
                'random_state' : [42]
            }
        },
    ]
}

In [7]:
config = {
    'path_dataset': '/home/rafael/Downloads/iris.csv',
    'path_results': '/home/rafael/Área de Trabalho/Projetos/TextCategorizationToolPython/saida/resultados_teste.csv',
    'validation': {
        'number_trials': 10,
        'number_labeled_examples': [1, 5, 10, 20, 30],
        'split_type': 'random',
    },
    'algorithms': [
        {
            'name': 'LocalOutlierFactor',
            'parameters': {
                'n_neighbors' : [1, 5, 9, 13, 17, 21],
                'metric': ['cosine','euclidean'],
                'novelty': [True],
                'n_jobs': [4]
            }
        },
        
    ]
}

In [8]:
config

{'path_dataset': '/home/rafael/Downloads/iris.csv',
 'path_results': '/home/rafael/Área de Trabalho/Projetos/TextCategorizationToolPython/saida/resultados_teste.csv',
 'validation': {'number_trials': 10,
  'number_labeled_examples': [1, 5, 10, 20, 30],
  'split_type': 'random'},
 'algorithms': [{'name': 'LocalOutlierFactor',
   'parameters': {'n_neighbors': [1, 5, 9, 13, 17, 21],
    'metric': ['cosine', 'euclidean'],
    'novelty': [True],
    'n_jobs': [4]}}]}

In [9]:
with open('config_example.json','w') as file:
    json.dump(config, file, indent=3)

In [10]:
path = '/home/rafael/Área de Trabalho/Projetos/TextCategorizationToolPython/one_class_learning/config_example.json'

# Main

In [11]:
# Comment the first two lines in case of ruuning the notebook
#if __name__ == '__main__': 
#    path_json = sys.argv[1]

with open(path, 'r') as file: 
    config = json.load(file)

X, y = load_data(config['path_dataset'])

config_algorithms = config['algorithms']
for algorithm in config_algorithms: 
    parameters = algorithm['parameters']
    parameters_list = generate_parameters_list(parameters)
    for parameters in parameters_list: 
        classifier = dict_algorithms[algorithm['name']](**parameters)
        try: 
            ocl.execute_exp(X,y,classifier,config)
        except Exception as Erro: 
            log(str(Erro))

{'n_neighbors': 1, 'metric': 'cosine', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 1, 'metric': 'euclidean', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 5, 'metric': 'cosine', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 5, 'metric': 'euclidean', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 9, 'metric': 'cosine', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 9, 'metric': 'euclidean', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 13, 'metric': 'cosine', 'novelty': True, 'n_jobs': 4}
Aquiiiiiiiiiiiiiiiiii
Number labeled examples: [1, 5, 10, 20, 30]
{'n_neighbors': 13, 'metric': 'euclidean', 'n

'2021/01/25, 18:36:45'