In [113]:
EJECUCIONES = 5

# ACTIVACION = 'tanh'
# ACTIVACION = 'sigmoid'
# ACTIVACION = 'relu'
ACTIVACION = LeakyReLU() #es la única función de activación que no la llamamos con un string, ver el resto:

# OPTIMIZADOR = 'sgd'
# OPTIMIZADOR = 'rmsprop'
OPTIMIZADOR = 'adam'

In [114]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
import chardet
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Input, LeakyReLU

nombre_archivo = '../Datos/AUTOS.csv'

with open(nombre_archivo, 'rb') as f:
    result = chardet.detect(f.read())  # or readline if the file is large

df= pd.read_csv(nombre_archivo, encoding=result['encoding'])

#-- seleccionar los atributos numéricos --
df = df.select_dtypes(include = ["int16", "int32", "int64", "float16", "float32", "float64"])
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   normalized-losses  164 non-null    float64
 1   wheel-base         205 non-null    float64
 2   length             205 non-null    float64
 3   width              205 non-null    float64
 4   height             205 non-null    float64
 5   curb-weight        205 non-null    int64  
 6   engine-size        205 non-null    int64  
 7   bore               201 non-null    float64
 8   stroke             201 non-null    float64
 9   compression-ratio  205 non-null    float64
 10  horsepower         203 non-null    float64
 11  peak-rpm           203 non-null    float64
 12  city-mpg           205 non-null    int64  
 13  highway-mpg        205 non-null    int64  
 14  price              201 non-null    float64
 15  symboling          205 non-null    int64  
dtypes: float64(11), int64(5)
m

In [115]:
# muestra cantidad de valores faltantes o nulos por atributo/característica
print(df.isnull().sum())

values = {}
for column in df.columns:
    values[column] = df[column].mean()
print(values)

# reemplaza valores nulos por la media de cada variable
df = df.fillna(value=values)

normalized-losses    41
wheel-base            0
length                0
width                 0
height                0
curb-weight           0
engine-size           0
bore                  4
stroke                4
compression-ratio     0
horsepower            2
peak-rpm              2
city-mpg              0
highway-mpg           0
price                 4
symboling             0
dtype: int64
{'normalized-losses': 122.0, 'wheel-base': 98.75658536585367, 'length': 174.04926829268288, 'width': 65.90780487804878, 'height': 53.72487804878049, 'curb-weight': 2555.5658536585365, 'engine-size': 126.90731707317073, 'bore': 3.3297512437810943, 'stroke': 3.255422885572139, 'compression-ratio': 10.142536585365855, 'horsepower': 104.25615763546799, 'peak-rpm': 5125.369458128079, 'city-mpg': 25.21951219512195, 'highway-mpg': 30.75121951219512, 'price': 13207.129353233831, 'symboling': 0.8341463414634146}


# queremos predecir dos variables, por eso ponemos 2 directamnte en el T, luego tendremos 2 neuronas al final de la red

In [116]:
datos = np.array(df)
print(datos)

attPred = [12,14] #12 mpg-highway y 14 precio  #-- nro de atributos a predecir
T = datos[:, attPred] # queremos predecir dos variables, por eso ponemos 2 directamnte en el T, luego tendremos 2 neuronas al final de la red
X = np.delete(datos, attPred, 1) # elimina columnas de atributos a predecir

data_scaler, target_scaler = StandardScaler(), StandardScaler()
X = data_scaler.fit_transform(X)
T = target_scaler.fit_transform(T)

[[ 1.2200e+02  8.8600e+01  1.6880e+02 ...  2.7000e+01  1.3495e+04
   3.0000e+00]
 [ 1.2200e+02  8.8600e+01  1.6880e+02 ...  2.7000e+01  1.6500e+04
   3.0000e+00]
 [ 1.2200e+02  9.4500e+01  1.7120e+02 ...  2.6000e+01  1.6500e+04
   1.0000e+00]
 ...
 [ 9.5000e+01  1.0910e+02  1.8880e+02 ...  2.3000e+01  2.1485e+04
  -1.0000e+00]
 [ 9.5000e+01  1.0910e+02  1.8880e+02 ...  2.7000e+01  2.2470e+04
  -1.0000e+00]
 [ 9.5000e+01  1.0910e+02  1.8880e+02 ...  2.5000e+01  2.2625e+04
  -1.0000e+00]]


In [117]:
EPOCAS = 1000 #lo que antes llamábamos max_iteraciones
TAM_LOTE = 50 #por default es 32

ENTRADAS = X.shape[1]
SALIDAS = len(attPred)#2

PACIENCIA = 15

model = Sequential()
model.add(Input(shape=(ENTRADAS,)))
model.add(Dense(6, activation=ACTIVACION))
model.add(Dense(3, activation= ACTIVACION))
model.add(Dense(SALIDAS))

model.summary()

# obtiene la arquitectura para el modelo y lo compila
model.compile(optimizer=OPTIMIZADOR, loss='mae', metrics = ['accuracy', 'mae', 'mse'])

In [118]:
cant_epocas = 0
ECMs = 0

for i in range(EJECUCIONES):
    #entreno
    x_train, x_val, y_train, y_val = train_test_split(X, T, test_size = 0.2,  shuffle = True)
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=PACIENCIA)
    history = model.fit(x = x_train, y = y_train, batch_size = TAM_LOTE, epochs = EPOCAS,
                  validation_data = (x_val, y_val), callbacks=[early_stop])
    
    #veo resultados con el train set
    pred = model.evaluate(x_train, y_train, verbose=0)
    ECMs += pred[2]
    cant_epocas += len(history.epoch)    

Epoch 1/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 86ms/step - accuracy: 0.6545 - loss: 0.8568 - mae: 0.8568 - mse: 1.2449 - val_accuracy: 0.7317 - val_loss: 0.8562 - val_mae: 0.8562 - val_mse: 1.2763
Epoch 2/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6405 - loss: 0.8340 - mae: 0.8340 - mse: 1.2029 - val_accuracy: 0.7073 - val_loss: 0.8305 - val_mae: 0.8305 - val_mse: 1.2149
Epoch 3/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.6430 - loss: 0.8348 - mae: 0.8348 - mse: 1.1912 - val_accuracy: 0.7073 - val_loss: 0.8056 - val_mae: 0.8056 - val_mse: 1.1578
Epoch 4/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6945 - loss: 0.7743 - mae: 0.7743 - mse: 1.0395 - val_accuracy: 0.7073 - val_loss: 0.7820 - val_mae: 0.7820 - val_mse: 1.1052
Epoch 5/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9247 - loss: 0.3394 - mae: 0.3394 - mse: 0.3577 - val_accuracy: 0.9512 - val_loss: 0.4043 - val_mae: 0.4043 - val_mse: 0.4459
Epoch 74/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9274 - loss: 0.3414 - mae: 0.3414 - mse: 0.3659 - val_accuracy: 0.9512 - val_loss: 0.4031 - val_mae: 0.4031 - val_mse: 0.4444
Epoch 75/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9067 - loss: 0.3502 - mae: 0.3502 - mse: 0.3796 - val_accuracy: 0.9512 - val_loss: 0.4023 - val_mae: 0.4023 - val_mse: 0.4437
Epoch 76/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9321 - loss: 0.3301 - mae: 0.3301 - mse: 0.3397 - val_accuracy: 0.9512 - val_loss: 0.4011 - val_mae: 0.4011 - val_mse: 0.4427
Epoch 77/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9414 - los

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9458 - loss: 0.2785 - mae: 0.2785 - mse: 0.2313 - val_accuracy: 0.9268 - val_loss: 0.3462 - val_mae: 0.3462 - val_mse: 0.3117
Epoch 146/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9363 - loss: 0.2731 - mae: 0.2731 - mse: 0.2359 - val_accuracy: 0.9268 - val_loss: 0.3461 - val_mae: 0.3461 - val_mse: 0.3101
Epoch 147/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9276 - loss: 0.2633 - mae: 0.2633 - mse: 0.2056 - val_accuracy: 0.9268 - val_loss: 0.3465 - val_mae: 0.3465 - val_mse: 0.3100
Epoch 148/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9536 - loss: 0.2839 - mae: 0.2839 - mse: 0.2670 - val_accuracy: 0.9268 - val_loss: 0.3464 - val_mae: 0.3464 - val_mse: 0.3091
Epoch 149/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9303 -

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9305 - loss: 0.2385 - mae: 0.2385 - mse: 0.1739 - val_accuracy: 0.9268 - val_loss: 0.3140 - val_mae: 0.3140 - val_mse: 0.2304
Epoch 218/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.9405 - loss: 0.2357 - mae: 0.2357 - mse: 0.1730 - val_accuracy: 0.9268 - val_loss: 0.3142 - val_mae: 0.3142 - val_mse: 0.2303
Epoch 219/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9165 - loss: 0.2366 - mae: 0.2366 - mse: 0.1700 - val_accuracy: 0.9268 - val_loss: 0.3144 - val_mae: 0.3144 - val_mse: 0.2306
Epoch 220/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.9405 - loss: 0.2395 - mae: 0.2395 - mse: 0.1783 - val_accuracy: 0.9268 - val_loss: 0.3142 - val_mae: 0.3142 - val_mse: 0.2299
Epoch 221/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9449 -

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9403 - loss: 0.2259 - mae: 0.2259 - mse: 0.1900 - val_accuracy: 0.9268 - val_loss: 0.2920 - val_mae: 0.2920 - val_mse: 0.1968
Epoch 290/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9465 - loss: 0.2092 - mae: 0.2092 - mse: 0.1520 - val_accuracy: 0.9268 - val_loss: 0.2920 - val_mae: 0.2920 - val_mse: 0.1964
Epoch 291/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.9185 - loss: 0.2184 - mae: 0.2184 - mse: 0.1764 - val_accuracy: 0.9268 - val_loss: 0.2916 - val_mae: 0.2916 - val_mse: 0.1962
Epoch 292/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9292 - loss: 0.2150 - mae: 0.2150 - mse: 0.1740 - val_accuracy: 0.9268 - val_loss: 0.2912 - val_mae: 0.2912 - val_mse: 0.1950
Epoch 293/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9338 -

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9316 - loss: 0.2342 - mae: 0.2342 - mse: 0.1984 - val_accuracy: 0.9756 - val_loss: 0.2042 - val_mae: 0.2042 - val_mse: 0.1488
Epoch 5/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9276 - loss: 0.2298 - mae: 0.2298 - mse: 0.1985 - val_accuracy: 0.9756 - val_loss: 0.2045 - val_mae: 0.2045 - val_mse: 0.1494
Epoch 6/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9430 - loss: 0.2178 - mae: 0.2178 - mse: 0.1636 - val_accuracy: 0.9756 - val_loss: 0.2049 - val_mae: 0.2049 - val_mse: 0.1500
Epoch 7/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.9216 - loss: 0.2043 - mae: 0.2043 - mse: 0.1371 - val_accuracy: 0.9756 - val_loss: 0.2053 - val_mae: 0.2053 - val_mse: 0.1504
Epoch 8/1000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.9003 - loss: 0

In [119]:
print("Epocas promedio: ", cant_epocas/EJECUCIONES)
print("ECM promedio: ", ECMs/EJECUCIONES)

Epocas promedio:  81.0
ECM promedio:  0.21013956964015962


# Tabla de resultados, haciendo 5 ejecuciones para cada caso

|optimizador|func act|epocas prom|ECM prom|
|---|---|---|---|
|sgd|tanh| 125.4| 0.308|
|sgd|sigmoid| 294.2| 0.315|
|sgd|relu|177 |0.243 |
|sgd|leaky relu|119 | 0.23|
|rms|tanh| 67.4| 0.235|
|rms|sigmoid| 112|0.304 |
|rms|relu| 101.2| 0.259|
|rms|leaky relu| 80.8|0.203|
|adam|tanh|94.2 | 0.223|
|adam|sigmoid| 108.2|0.254 |
|adam|relu| 121|0.217 |
|adam|leaky relu|81 |0.21 |

# Conclusiones:
Tenemos una red neuronal con 3 capas, donde cada una tiene 6, 3 y 2 neuronas respectivamente. Recordar que el sgd con momento es una buena forma para reducir las osiclaciones del entrenamiento, bajando la cantidad de épocas necesarias y reduciendo el error. Lo mismo hacen las optimizaciones con rms y adam, solo que estas son más complejas y pueden tardar más y dar los mismos resultados que sgd con momento si es que estamos ante un modelo pequeño.
En base a lo dicho anteriormente, y notando que las optimizaciones de rms y adam dieron resultados similares, podemos concluir que el modelo hecho es intermedio. Como es intermedio el optimizador con momento no es suficiente, mientras que las otras dos alternativas son más complejas y eficaces.