# TRABALHO 2 - Metaheuristics (VNS)

## Alunos:
- **Claudia Oliveira**
- **Matheus Oliveira**

# SETUP

In [1]:
import numpy as np
import copy
import pandas as pd
import random

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset = pd.read_csv('sonar.all-data.csv', sep=',', header=None)
labels = dataset[60]
data = dataset.drop([60], axis=1)

# NORMALIZAÇÃO

In [85]:
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

In [86]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
np.min(data[:,0]), np.max(data[:,0])

(0.0, 1.0)

## Ajusta labels

In [87]:
from sklearn.preprocessing import LabelEncoder

In [88]:
encoder = LabelEncoder()
encoded_labels = pd.Series(encoder.fit_transform(labels))
encoded_labels[encoded_labels == 0] = -1

# TREINAMENTO

In [47]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook
from sklearn.metrics import mean_squared_error, accuracy_score
from math import sqrt

## Funções

In [78]:
def vizinhanca_variavel(gamma, C, tamanho):
    
    gamma_copy = copy.copy(gamma)
    C_copy = copy.copy(C)
    
    fator_comp = tamanho / 2

    gamma = gamma + (tamanho * np.random.rand() - fator_comp) * gamma
    C = C + (tamanho * np.random.rand() - fator_comp) * C
    
    while gamma <= 0 or C <= 0:
        gamma = gamma_copy
        C = C_copy
        gamma = gamma + (tamanho * np.random.rand() - fator_comp) * gamma
        C = C + (tamanho * np.random.rand() - fator_comp) * C
        
    return gamma, C

In [119]:
def vizinhanca_local(gamma, C):
    
    gamma_copy = copy.copy(gamma)
    C_copy = copy.copy(C)
    
    gamma = gamma + (2 * np.random.rand() - 1) * gamma
    C = C + (2 * np.random.rand() - 1) * C
    
    while gamma <= 0 or C <= 0:
        gamma = gamma_copy
        C = C_copy
        gamma = gamma + (2 * np.random.rand() - 1) * gamma
        C = C + (2 * np.random.rand() - 1) * C
        
    return gamma, C

In [120]:
def f(y_true, y_pred):
    
    return sqrt(mean_squared_error(y_true, y_pred))

In [121]:
kf = KFold(n_splits=10)
random_state=42

In [122]:
def VNS(gamma_inicial, C_inicial, N_max, max_iter):
    curr_fold = 1

    for train_index, test_index in kf.split(data):

        X_train, X_test = data[train_index], data[test_index]
        y_train, y_test = encoded_labels[train_index], encoded_labels[test_index]

        print('Validação Cruzada - Fold {}'.format(curr_fold))
        print('Conjunto de treinamento - Dados {} - {}'.format(X_train.shape, y_train.shape))
        print('Conjunto de teste - Dados {} - {}'.format(X_test.shape, y_test.shape))
        print('____________________________________________________________________________')

        X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.3, random_state=random_state)

        gamma = gamma_inicial
        C = C_inicial
               
        svm_s = SVC(gamma=gamma, C=C)
        svm_s = svm_s.fit(X_train, y_train)
        y_pred_s = svm_s.predict(X_val)
        f_s = f(y_val, y_pred_s)

        melhor_f = f_s
        melhor_gamma = gamma
        melhor_C = C

        for i in range(max_iter):
            n = 1
            while n < N_max :
                gamma_s1, C_s1 = vizinhanca_variavel(melhor_gamma, melhor_C, n)
                gamma_s2, C_s2 = vizinhanca_local(gamma_s1, C_s1)
                
                svm_s2 = SVC(gamma=gamma_s2, C=C_s2)
                svm_s2 = svm_s2.fit(X_train, y_train)
                y_pred_s2 = svm_s2.predict(X_val)
                f_s2 = f(y_val, y_pred_s2)
                
                if f_s2 < melhor_f:
                    melhor_f = f_s2
                    melhor_gamma = gamma_s2
                    melhor_C = C_s2
                    n = 1
                else:                    
                    n += 1


        print('VNS terminado!\n')
        print('Melhor loss: {}'.format(melhor_f))
        print('Melhor gamma: {}'.format(melhor_gamma))
        print('Melhor C: {}'.format(melhor_C))

        print('Treinando modelo final....')
        final_svm = SVC(gamma=melhor_gamma, C=melhor_C)
        final_svm = final_svm.fit(X_train, y_train)
        print('Avaliando acurácia no conjunto de teste...')
        accuracy = final_svm.score(X_test, y_test)
        print('Acurácia: {}'.format(accuracy))

        curr_fold +=1

        print('############################################################################')
        
    return final_svm

In [124]:
final_svm = VNS(0.1,10,4,15)

Validação Cruzada - Fold 1
Conjunto de treinamento - Dados (187, 60) - (187,)
Conjunto de teste - Dados (21, 60) - (21,)
____________________________________________________________________________
VNS terminado!

Melhor loss: 1.0690449676496976
Melhor gamma: 0.15292456475141689
Melhor C: 16.96119911137867
Treinando modelo final....
Avaliando acurácia no conjunto de teste...
Acurácia: 0.42857142857142855
############################################################################
Validação Cruzada - Fold 2
Conjunto de treinamento - Dados (187, 60) - (187,)
Conjunto de teste - Dados (21, 60) - (21,)
____________________________________________________________________________
VNS terminado!

Melhor loss: 0.0
Melhor gamma: 0.06591415427435868
Melhor C: 25.932541806124064
Treinando modelo final....
Avaliando acurácia no conjunto de teste...
Acurácia: 0.7857142857142857
############################################################################
Validação Cruzada - Fold 3
Conjunto de treina