# Setup inicial

In [487]:
import numpy as np
import pandas as pd
import random

In [488]:
import warnings
warnings.filterwarnings('ignore')

In [489]:
#dataset = np.genfromtxt('sonar.all-data.csv', delimiter=',', dtype=None)
dataset = pd.read_csv('sonar.all-data.csv', sep=',', header=None)

In [490]:
labels = dataset[60]
data = dataset.drop([60], axis=1)

# Normalização

In [491]:
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

In [492]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [493]:
np.min(data[:,0]), np.max(data[:,0])

(0.0, 1.0)

In [494]:
print(Counter(labels))

Counter({'M': 111, 'R': 97})


# Ajustar labels para -1 e 1

In [495]:
from sklearn.preprocessing import LabelEncoder

In [496]:
encoder = LabelEncoder()
encoded_labels = pd.Series(encoder.fit_transform(labels))

In [497]:
encoded_labels[encoded_labels == 0] = -1

# Treinamento

In [498]:
# Modelos
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# K-fold CrossValidation
from sklearn.model_selection import KFold

# Métricas
from sklearn.metrics import accuracy_score, recall_score, precision_score

from sklearn.utils import resample
from sklearn.model_selection import train_test_split

from sklearn.utils import shuffle

## Funções para o treinamento

In [499]:
def select_samples(X,y,weights, size):
    selected_X = []
    selected_y = []
    
    for new_element in range(size):
        sorted_pivot = random.uniform(0, 1)
        accum_weight = 0
        selected_index = 0

        for eval_element in zip(X,y, weights):
            X_element = eval_element[0]
            y_element = eval_element[1]
            weight_element = eval_element[2]            
            accum_weight += weight_element
            
            if sorted_pivot <= accum_weight:
                selected_X.append(X_element)
                selected_y.append(y_element)
                break
    return selected_X, selected_y

In [573]:
def adjust_weights(sample_weights, correct_predictions, incorrect_predictions):
    #calcula beta
    total_error = np.sum(sample_weights[incorrect_predictions])
    beta = 0.5 * np.log( (1 - total_error) / float(total_error))
    
    #ajusta pesos
    sample_weights[incorrect_predictions] = sample_weights[incorrect_predictions] * np.exp(beta)
    sample_weights[correct_predictions] = sample_weights[correct_predictions] * np.exp(np.multiply(-1, beta))

    #normaliza pesos
    total_weight = np.sum(sample_weights)
    sample_weights = sample_weights / total_weight
    
    return sample_weights

In [586]:
models = {}
models[0] = DecisionTreeClassifier()
models[1] = MLPClassifier()
models[2] = MLPClassifier()

performance = {
    'acuracia': [],
    'recall': [],
    'precisao': []
}

kf = KFold(n_splits=10)
num_samples = 10
random_state=30

data, encoded_labels = shuffle(data, encoded_labels, random_state = random_state)

In [597]:
curr_fold = 1
for train_index, test_index in kf.split(data):
    predictions = []
    ensemble_predictions = []
    
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = encoded_labels[train_index], encoded_labels[test_index]
    
    print('Validação Cruzada - Fold {}'.format(curr_fold))
    print('Conjunto de treinamento - Dados {} - {}'.format(X_train.shape, y_train.shape))
    print('Conjunto de teste - Dados {} - {}'.format(X_test.shape, y_test.shape))
    print('____________________________________________________________________________')
    
    sample_X_train, sample_y_train = resample(X_train,
                                              y_train,
                                              n_samples = num_samples,
                                              random_state = random_state)

    #Inicializa pesos
    sample_weights = np.ones(num_samples) / num_samples
    
    for model in models:
        print(type(models[model]).__name__)
        correct_predictions = []
        incorrect_predictions = []
        selected_X_train = []
        selected_Y_train = []
        
        """
        if model == 0:
            selected_X_train = sample_X_train
            selected_y_train = sample_y_train
        else:
            selected_X_train, selected_y_train = select_samples(sample_X_train, 
                                                                sample_y_train, 
                                                                sample_weights,
                                                                 num_samples )
        """
        selected_X_train, selected_y_train = select_samples(sample_X_train, 
                                                                sample_y_train, 
                                                                sample_weights,
                                                                 num_samples )
        models[model].fit(selected_X_train, selected_y_train)
        prediction = models[model].predict(selected_X_train)
      
        #busca índices das predições corretas e incorretas
        for results in enumerate(zip(sample_y_train,prediction)):
            real_class = results[1][0]
            predicted_class = results[1][1]
            if real_class != predicted_class:
                incorrect_predictions.append(results[0])
            else:
                correct_predictions.append(results[0])
            
        sample_weights = adjust_weights(sample_weights, correct_predictions, incorrect_predictions)
        predictions.append(prediction)
        
    predictions = np.vstack(predictions)
    for col in range(predictions.shape[1]):
        cnt = Counter()
        
        votes = predictions[:,col]
        for vote in votes:
            cnt[vote] +=1
        
        ensemble_predictions.append(cnt.most_common()[0][0])
    #for col in range()
    
    # Cálculo de métricas
    ens_acc = accuracy_score(sample_y_train, ensemble_predictions)
    ens_recall = recall_score(sample_y_train, ensemble_predictions)
    ens_precisao = precision_score(sample_y_train, ensemble_predictions)
    
    print('Métricas - Fold {}'.format(curr_fold))
    print('Acurácia: {}'.format(ens_acc))
    print('Recall: {}'.format(ens_recall))
    print('Precision: {}'.format(ens_precisao))
    print('############################################################################')
    
    performance['acuracia'].append(ens_acc)
    performance['recall'].append(ens_recall)
    performance['precisao'].append(ens_precisao)
    
    curr_fold +=1
    
    print('############################################################################')
    

Validação Cruzada - Fold 1
Conjunto de treinamento - Dados (187, 60) - (187,)
Conjunto de teste - Dados (21, 60) - (21,)
____________________________________________________________________________
DecisionTreeClassifier
MLPClassifier
MLPClassifier
Métricas - Fold 1
Acurácia: 0.4
Recall: 0.0
Precision: 0.0
############################################################################
############################################################################
Validação Cruzada - Fold 2
Conjunto de treinamento - Dados (187, 60) - (187,)
Conjunto de teste - Dados (21, 60) - (21,)
____________________________________________________________________________
DecisionTreeClassifier
MLPClassifier
MLPClassifier
Métricas - Fold 2
Acurácia: 0.6
Recall: 0.3333333333333333
Precision: 0.3333333333333333
############################################################################
############################################################################
Validação Cruzada - Fold 3
Conjunto de treinam

# TESTES

In [603]:
X_train_test = ['A','B','C','D','E','F','G','H','I','J']

y_train_test = [1,1,1,1,1,1,1,1,1,1]

weights = np.array([0.14,0.03,0.07,0.21,0.13,0.04,0.1,0.1,0.15,0.03])

sel_X, sel_y = select_samples(X_train, y_train, weights, 10000)

print(sample_weights[0])

0.0339943342776204


In [598]:
weights = initialize_weights(sample_y_train, num_samples)
print(weights)

{37: 0.1, 165: 0.1, 173: 0.1, 45: 0.1, 140: 0.1, 151: 0.1, 130: 0.1, 53: 0.1, 145: 0.1, 174: 0.1}
