In [67]:
import numpy as np

from dataSetPreProcessing import train_validation_test_split
from sklearn.model_selection import train_test_split
from loadDataSet import loadMainDataSet,loadTesteDataSet,loadCompletDataSet,loadMainDataSetWithElevation
from tools import verifyArgs,findBalancedDataSet,pltResults,pltCorrelation, pltLossGraph,pltShow,plotXY,getMetrics,plotLeanrningCurve,getBalancedDataSetIndexRandomState

from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore", category = RuntimeWarning)
warnings.filterwarnings('ignore', 'Solver terminated early.*')

In [68]:
# Carregar conjunto de dados
dataSet,features_names,target_names = loadMainDataSetWithElevation()

    Latitude  Longitude  Elevation     Ca      Mg     Na     Cl
0 -14.875833 -39.901389   282.6006  120.0  141.00  172.0  660.0
1 -14.853333 -39.661111   184.0000    0.8    1.45    4.3    7.0


In [69]:
def getParamGrid():
    '''Função que retorna os parâmetro utilizados para encontrar 
    o conjunto de treino balanceado e também na tunagem.'''
    param_grid_half = {        
        'kernel':['linear','rbf','sigmoid'],
        'gamma': ['scale','auto'], 
        'tol':[1e-5,1e-4], 
        'epsilon':[1e-4],
        'max_iter':[-1],
        'C':[1.5, 10]
    }
    param_grid_full = {
        'kernel':['linear','rbf','sigmoid'],
        'gamma': ['scale','auto',1e-7, 1e-4], 
        'tol':[1e-3,1e-5,1e-4,1e-2], 
        'epsilon':[1e-4],
        'max_iter':[-1],
        'C':[1.5, 10]
    }
    return param_grid_half,param_grid_full


In [70]:
def tuningParameters(model,param_grid,X_train,y_train,verbose=0):
    '''Função que realiza o tuning de parâmetros e também a validação cruzada.'''
    reg =GridSearchCV(model, cv=10,param_grid=param_grid,verbose=verbose,n_jobs=-1,scoring='r2',iid=True)
    reg.fit(X_train,y_train)
    
    print("#Best score:",reg.best_score_)
    print("#Best params:",reg.best_params_)
          
    return reg.best_estimator_,reg.best_params_,reg.best_score_

In [71]:
def getBestSeed(X,y,faixa,verbose=0):
    '''Função responsável por retornar a semênte randômica para a replicação do conjunto de dados balanceado'''
     
    param_grid_half,_ = getParamGrid()
    maior_score = 0
    seed = 0
    for i in faixa:
        X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=i)
        model = SVR()
        best_model,best_params,best_score = tuningParameters(model,param_grid_half,X_train,y_train)
    if best_score>maior_score:
        maior_score = best_score
        seed = i

    y_train_pred = best_model.predict(X_train)

    if verbose:
        print("#Maior Score: ",maior_score)
        print("#Seed : ",seed)

    return seed,maior_score

In [72]:
def runTest(target,verbose=0):
    '''Executar a busca do melhor modelo para os conjuntos de entrada e o target'''
    X = dataSet[:,:4]
    y = dataSet[:,target]
    seed,score = getBestSeed(X,y,range(1,10),verbose=verbose)

    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=seed)
        
    _,param_grid_full = getParamGrid()
    tuningParameters(SVR(),param_grid_full,X_train,y_train,verbose=verbose)

    print("#Best Seed:",seed)



In [73]:
def avaliateModel(model,X_train,X_val,X_test,y_train,y_val,y_test,param_key_loss,target,verbose=0,stepLoss=25,):

    model.fit(X_train,y_train)

    plotLeanrningCurve(X_train,X_val,y_train,y_val,model,param_key_loss,'mean_squared_error',legend_1="Treino",legend_2="Validação",verbose=verbose,step=25)
    plotLeanrningCurve(X_train,X_test,y_train,y_test,model,param_key_loss,'mean_squared_error',legend_1="Treino",legend_2="Teste",verbose=verbose,step=25)
     
    
    y_train_pred = model.predict(X_train)
    print("#Metricas para os dados de treino")
    getMetrics(y_train,y_train_pred,verbose=1)
    plotXY(y_train,y_train_pred,"Treino","Predito",target_names[target],"Treino X Predito",midle_line=True)
    
    y_val_pred = model.predict(X_val)    
    print("#Metricas para os dados de validação")
    getMetrics(y_val,y_val_pred,verbose=1)
    plotXY(y_val,y_val_pred,"Validação","Predito",target_names[target],"Validação X Predito",midle_line=True)
     
    y_test_pred = model.predict(X_test)
    print("#Metricas para os dados de teste")
    getMetrics(y_test,y_test_pred,verbose=1)    
    plotXY(y_test,y_test_pred,"Teste","Predito",target_names[target],"Teste X Predito",midle_line=True)
    
   
    pltShow()
    

In [74]:
def MELHOR_MG(verbose=0):
    params = {'C': 1.5, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 2000, 'tol': 0.001}
    model = SVR(**params)

    X = dataSet[:,:4]
    y = dataSet[:,4]
   
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=9)
    X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size = 0.2,random_state=7) 
    avaliateModel(model,X_train,X_val,X_test,y_train,y_val,y_test,'max_iter',0,verbose=verbose,stepLoss=25)

    #Best score: 0.7773431551951668
    #Best params: {'C': 1.5, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'tol': 0.001}
    #Best Seed: 5
    #Metricas para os dados de treino
    #R-squared: 0.83808
    #Mean Squared Error: 0.00422
    #Metricas para os dados de validação
    #R-squared: 0.82290
    #Mean Squared Error: 0.00327
    #Metricas para os dados de teste
    #R-squared: 0.81450
    #Mean Squared Error: 0.00295

In [75]:
def MELHOR_NA(verbose=0):
    params ={'C': 10, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 2000, 'tol': 0.001}
    model = SVR(**params)

    X = dataSet[:,:4]
    y = dataSet[:,5]
   
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=2)
    X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size = 0.2,random_state=0) 
    avaliateModel(model,X_train,X_val,X_test,y_train,y_val,y_test,'max_iter',1,verbose=verbose,stepLoss=25)

    #Best score: 0.6511186214845505
    #Best params: {'C': 10, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'tol': 0.001}
    #Best Seed: 2
    #Metricas para os dados de treino
    #R-squared: 0.73896
    #Mean Squared Error: 0.00563
    #Metricas para os dados de validação
    #R-squared: 0.86552
    #Mean Squared Error: 0.00282
    #Metricas para os dados de teste
    #R-squared: 0.74592
    #Mean Squared Error: 0.00848

In [76]:
def MELHOR_K(verbose=0):
    params = {'C': 1.5, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': 2000, 'tol': 0.001}
    model = SVR(**params)

    X = dataSet[:,:4]
    y = dataSet[:,6]
   
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=3)
    X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size = 0.2,random_state=0) 
    avaliateModel(model,X_train,X_val,X_test,y_train,y_val,y_test,'max_iter',2,verbose=verbose,stepLoss=25)
    #Best score: 0.7409384454642771
    #Best params: {'C': 1.5, 'epsilon': 0.0001, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'tol': 0.001}
    #Best Seed: 3
    #Metricas para os dados de treino
    #R-squared: 0.88285
    #Mean Squared Error: 0.00241
    #Metricas para os dados de validação
    #R-squared: 0.67761
    #Mean Squared Error: 0.01170
    #Metricas para os dados de teste
    #R-squared: 0.74512
    #Mean Squared Error: 0.00617


In [77]:
MELHOR_MG()

TypeError: must be real number, not str

In [65]:
MELHOR_NA()

TypeError: must be real number, not str

In [66]:
MELHOR_K()

TypeError: must be real number, not str