# Rede Neural de Múltiplas Camadas para Regressão Simbólica

In [1]:
import numpy as np
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LassoCV
from sklearn.linear_model import LassoLarsCV

# ignoramos os resultados NaN das funções pois vamnos zera-los
np.seterr(invalid='ignore')

{'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}

- carrega os dados do arquivo fname e retorna X, y

In [2]:
def importaDados(fname):
    dataset = np.loadtxt(fname, delimiter=",")
    X = dataset[:, :-1]
    y = dataset[:, -1]
    return (X, y)

- recebe uma matriz X e uma rede de expoentes
- retorna os dados transformados
- substitui todos os valores NaN e Inf para 0

In [3]:
def transformData(X, rede):
    n_rows = X.shape[0]
    n_inter = rede.shape[1]
    layers = np.ndarray((n_rows, 3*n_inter))
    for i in range(0, n_inter*3, 3):
        power = X**rede[:,int(i/3)]
        layers[:,i] = np.prod(power,axis=1) # id
        layers[:,i+1] = np.cos(layers[:,i]) # cos
        layers[:,i+2] = np.sqrt(layers[:,i]) # sqrt
    cols = np.any(np.isnan(layers), axis=0)
    layers[:, cols] = 0
    return layers

- Cria a camada de expoentes da rede com n_inter neurônios
- Aplica a função transformData em X_train utilizando essa rede
- Divida a base entre treino e validação
- Aplique o LassoCV e LassoLarsCV, verifique na validação o que retorna o menor erro
- Retorne a rede e o modelo de menor erro
- expoentes aleatorios de 0 a 2

In [4]:
def fit(X_train, y_train, n_inter):
    n_inputs = X_train.shape[1]
    exponents = np.random.randint(0,3,size=(n_inputs, n_inter))
    X_transf = transformData(X_train, exponents)
    X_train, X_test, y_train, y_test = train_test_split(X_transf, y_train, test_size=0.3, random_state=1)
    models = {'lassoCV':LassoCV(max_iter=5e4, cv=3), 'lassoLarsCV':LassoLarsCV(max_iter=5e4, cv=3)}
    rmse = {}
    for key, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        rmse[key] = np.sqrt(mean_squared_error(y_test, y_pred))
        print(key, rmse[key]) # so pra conferir se eu to fazdo certo
    print('min', min(rmse, key=rmse.get)) # so pra conferir se eu to fazdo certo
    return exponents, models[min(rmse, key=rmse.get)]

- Aplica transformData em X_test usando a rede
- Aplique o método predict de modelo na base transformada e armazena a saída em y_hat
- Retorna y_hat

In [5]:
def predict(X_test, rede, modelo):
    X_transf = transformData(X_test, rede)
    y_hat = modelo.predict(X_transf)
    return y_hat

- main

In [6]:
NINTER = 5
print("n inter", NINTER)

bases = ['airfoil', 'concrete', 'cpu', 'energyCooling', 'energyHeating',
         'forestfires', 'towerData', 'wineRed', 'wineWhite', 'yacht']
basesquebugam = ['bioavailability', 'ppb']

for base in bases:
    fileTrain = 'datasets/' + base + '-train-0.dat'
    X_train, y_train = importaDados(fileTrain)
    rede, modelo = fit(X_train, y_train, NINTER)
    fileTest = 'datasets/' + base + '-test-0.dat'
    X_test, y_test = importaDados(fileTest)
    y_pred = predict(X_test, rede, modelo)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print('rmse', base, rmse, '\n')

n inter 5
lassoCV 6.40009411517889
lassoLarsCV 5.2220226976922905
min lassoLarsCV
rmse airfoil 5.357374009772064 

lassoCV 15.477505972484359
lassoLarsCV 14.45235554319056
min lassoLarsCV
rmse concrete 14.66909108429513 

lassoCV 559.8124571040972
lassoLarsCV 290.97999979923344
min lassoLarsCV
rmse cpu 70.06103712444842 

lassoCV 9.739216398637595
lassoLarsCV 5.137992183801275
min lassoLarsCV
rmse energyCooling 5.5709338418284835 

lassoCV 8.666101347456971
lassoLarsCV 4.081405742994017
min lassoLarsCV
rmse energyHeating 4.5585815006219 

lassoCV 21.61956429670779
lassoLarsCV 19.3531125546516
min lassoLarsCV
rmse forestfires 82.25011717728836 

lassoCV 75.86265996768928
lassoLarsCV 61.75929799072116
min lassoLarsCV
rmse towerData 66.07240206253448 

lassoCV 0.7460803807618895
lassoLarsCV 0.6975787410971579
min lassoLarsCV
rmse wineRed 0.7330427458252987 

lassoCV 0.8822215317407687
lassoLarsCV 0.8595346002337939
min lassoLarsCV
rmse wineWhite 0.8663455024818917 

lassoCV 10.56808504550

### testes

Eu testei pras mesmas bases de dados do programa em c e houveram duas q bugam (bioavailability e ppb)

```File "/home/ufabc/anaconda3/lib/python3.6/site-packages/scipy/interpolate/interpolate.py", line 528, in __init__
    "least %d entries" % minval)
ValueError: x and y arrays must have at least 2 entries
```
O restante deu rmse bom, todas quiseram o modelo `lassoLarsCV`.

Seus resultados eram:

```| dataset         | MAE_MLP        | RMSE_MLP       | MAE_XG         | RMSE_XG       |
|-----------------|----------------|----------------|----------------|---------------|
| airfoil         | 6.18002242479  | 7.67184518627  | 1.11938559178  |1.8309682719   |
| bioavailability | 20.5488266959  | 25.3781544225  | 24.4211684128  |31.7162529705  |
| concrete        | 8.29282257088  | 10.4776342927  | 2.87123275074  |4.0740942837   |
| cpu             | 80.5331905948  | 222.712701917  | 22.7338762755  |77.5892865368  |
| energyCooling   | 2.52266654687  | 3.53526528505  | 0.361799431505 |0.514431391528 |
| energyHeating   | 2.54868218149  | 3.42836981878  | 0.222104601588 |0.338121978888 |
| forestfires     | 22.7982665304  | 107.530502684  | 29.0364162179  |116.625945888  |
| ppb             | 28.6142294122  | 33.5858995656  | 31.8702452249  |40.1568089435  |
| towerData       | 18.8099924172  | 25.4063703414  | 11.8206282622  |17.0157431156  |
| wineRed         | 0.479160333719 | 0.626380925203 | 0.378293441273 |0.591409131066 |
| wineWhite       | 0.59194468634  | 0.76440593106  | 0.452436536478 |0.678575144104 |
| yacht           | 7.31679187865  | 9.67918507725  | 0.392170667968 |0.849322392096 |
```