<h1><center>Predicción de niveles de contaminación por partículas en Madrid mediante técnicas de aprendizaje profundo</center></h1>

<div style="text-align: center">Trabajo Fin de Máster</div>
<div style="text-align: center">Máster en Inteligencia Artificial Aplicada (UC3M)</div>
<div style="text-align: center">Laura Lillo Collado</div>

In [None]:
import json
import math
import os
import pickle
import time
import warnings
import zipfile
from datetime import datetime

import contextily as ctx
import folium
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import openmeteo_requests
import pandas as pd
import requests_cache
import seaborn as sns
from retry_requests import retry
from scipy.stats import normaltest, zscore
from shapely.geometry import Point
from sklearn.impute import KNNImputer
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import *
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from keras.layers import Conv1D, Dense, Flatten, MaxPooling1D

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

## Metodología

1. [Comprensión de datos](#1.)

    1.1. [Extracción de datos](#1.1.)
    <br><br>
    1.2. [Integración de datos](#1.2.)
    <br><br>
    1.3. [Estudio preliminar de datos](#1.3.)
    <br><br>
2. [Preparación de datos](#2.)
<br><br>
3. [Modelado y Evaluación](#3.)

    3.1. [Modelos Naive](#3.1.)
    <br><br>
    3.2. [Estación 8](#3.2.)
    <br><br>
    3.3. [Estación 18](#3.3.)
    <br><br>
    3.4. [Estación 24](#3.4.)
    <br><br>
    3.5. [Estación 36](#3.5.)
    <br><br>
    3.6. [Estación 38](#3.6.)
    <br><br>
    3.7. [Estación 40](#3.7.)
    <br><br>
    3.8. [Estación 47](#3.8.)
    <br><br>
    3.9. [Estación 48](#3.9.)
    <br><br>
    3.10. [Estación 50](#3.10.)
    <br><br>
    3.11. [Estación 55](#3.11.)
    <br><br>
    3.12. [Estación 57](#3.12.)
    <br><br>
    3.13. [Estación 60](#3.13.)
    <br>
4. [Análisis de resultados](#4.)

### 1. Comprensión de datos <a class="anchor" id="1."></a> 

#### 1.1. Extracción de datos <a class="anchor" id="1.1."></a> 

In [None]:
# Leer datos con información de las estaciones de control de contaminación
estaciones_medicion  = pd.read_csv('informacion_estaciones_red_calidad_aire.csv', sep=';')
info_estaciones = estaciones_medicion[estaciones_medicion['PM10']=='X'][['ESTACION', 'CODIGO_CORTO', 'LONGITUD', 'LATITUD']]
info_estaciones.columns = ['NombreEstacion', 'CodigoEstacion', 'Longitud', 'Latitud']
info_estaciones['NombreEstacion'] = info_estaciones['NombreEstacion'].astype(str)
info_estaciones['CodigoEstacion'] = info_estaciones['CodigoEstacion'].astype(str)
info_estaciones['Longitud'] = info_estaciones['Longitud'].astype(float)
info_estaciones['Latitud'] = info_estaciones['Latitud'].astype(float)

# Creación de dataframe con datos sobre calidad del aire
"""
directorio_zips = 'C:\\Users\\Laura Lillo\\OneDrive\\Master\\TFM\\Datos_CalidadAire'
archivo_salida_calidad_aire = 'datos_calidad_aire_pm10.txt'
datos_calidad_aire_recopilados = []
for archivo_zip in os.listdir(directorio_zips):
    if archivo_zip.endswith('.zip'):
        anio = int(archivo_zip[4:8])
        ruta_zip = os.path.join(directorio_zips, archivo_zip)
        with zipfile.ZipFile(ruta_zip, 'r') as zip_ref:
            for nombre_archivo in zip_ref.namelist():
                if nombre_archivo.endswith('.txt'):
                    with zip_ref.open(nombre_archivo) as archivo_txt:
                        for linea in archivo_txt:
                            datos = linea.decode('utf-8').split('\n')[0].split('\r')[0].split(',')                        
                            datos_calidad_aire_recopilados.append(datos)

columnas_datos_zip = ["Provincia", "Municipio", "CodigoEstacion", "Magnitud", "Tecnica", "PeriodoAnalisis", "Anyo", "Mes", "Dia"] 
columnas_datos_zip_horas = [f"H{i:02d}" for i in range(1, 25)]
columnas_datos_zip_v = [f"V{i:02d}" for i in range(1, 25)]
columnas_datos_zip_intercaladas = [col for pair in zip(columnas_datos_zip_horas, columnas_datos_zip_v) for col in pair]
columnas_datos_zip += columnas_datos_zip_intercaladas
datos_calidad_aire = pd.DataFrame(datos_calidad_aire_recopilados, columns=columnas_datos_zip)
datos_pm10 = datos_calidad_aire[datos_calidad_aire['Magnitud']=='10']

warnings.filterwarnings("ignore")
datos_pm10['Provincia'] = datos_pm10['Provincia'].astype(str)
datos_pm10['Municipio'] = datos_pm10['Municipio'].astype(str)
datos_pm10['CodigoEstacion'] = datos_pm10['CodigoEstacion'].astype(int).astype(str)
datos_pm10['Magnitud'] = datos_pm10['Magnitud'].astype(str)
datos_pm10['Tecnica'] = datos_pm10['Tecnica'].astype(str)
datos_pm10['PeriodoAnalisis'] = datos_pm10['PeriodoAnalisis'].astype(str)

datos_pm10[columnas_datos_zip_horas] = datos_pm10[columnas_datos_zip_horas].astype(float)
datos_pm10[columnas_datos_zip_v] = datos_pm10[columnas_datos_zip_v].applymap(lambda x: True if x == 'V' else False)

if len(datos_pm10['Provincia'].unique())==1:
    datos_pm10.drop('Provincia', axis=1, inplace=True)
if len(datos_pm10['Municipio'].unique())==1:
    datos_pm10.drop('Municipio', axis=1, inplace=True)
if len(datos_pm10['CodigoEstacion'].unique())==1:
    datos_pm10.drop('CodigoEstacion', axis=1, inplace=True)
if len(datos_pm10['Magnitud'].unique())==1:
    datos_pm10.drop('Magnitud', axis=1, inplace=True)
if len(datos_pm10['Tecnica'].unique())==1:
    datos_pm10.drop('Tecnica', axis=1, inplace=True)
if len(datos_pm10['PeriodoAnalisis'].unique())==1:
    datos_pm10.drop('PeriodoAnalisis', axis=1, inplace=True)
warnings.filterwarnings("default")

pm10datoss = datos_pm10.copy()
pm10datoss['CodigoEstacion'] = pm10datoss['CodigoEstacion'].astype(str)
pm10datoss['aux'] = pm10datoss['Anyo'].astype(str) + '-' + pm10datoss['Mes'].astype(str) + '-' + pm10datoss['Dia'].astype(str)
pm10datoss['Fecha'] = pd.to_datetime(pm10datoss['aux'], format='%Y-%m-%d')
pm10datoss.drop(['aux'], axis=1, inplace=True)
pm10datoss.index = pm10datoss['Fecha']
#pm10datoss.drop(['Fecha'], axis=1, inplace=True)
pm10datoss.drop(['Anyo'], axis=1, inplace=True)
pm10datoss.drop(['Mes'], axis=1, inplace=True)
pm10datoss.drop(['Dia'], axis=1, inplace=True)
for i in range(1, 25):
    hora_col = f'H{i:02d}'
    val_col = f'V{i:02d}'
    pm10datoss.loc[~pm10datoss[val_col], hora_col] = float('NaN')
    pm10datoss.drop([val_col], axis=1, inplace=True)
pm10datoss.to_csv('datosPM10.csv', index=False)
"""
# Lectura de datos calidad del aire
pm10 = pd.read_csv('datosPM10.csv')
pm10['CodigoEstacion'] = pm10['CodigoEstacion'].astype(str)
pm10['Fecha'] = pd.to_datetime(pm10['Fecha'], format='%Y-%m-%d')
pm10 = pm10.sort_values(by=['Fecha'])
pm10.index = pm10['Fecha']
pm10.drop(['Fecha'], axis=1, inplace=True)
pm10 = pm10[pm10['CodigoEstacion'].isin(info_estaciones['CodigoEstacion'].unique())]

"""
todo_meteorologia = pd.DataFrame()
for fila in range(len(info_estaciones)):
    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Make sure all required weather variables are listed here
    # The order of variables in hourly or daily is important to assign them correctly below
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": info_estaciones.iloc[fila]['Latitud'],
        "longitude": info_estaciones.iloc[fila]['Longitud'],
        "start_date": "2010-01-01",
        "end_date": "2023-12-31",
        "hourly":  ["temperature_2m", 
                    "relative_humidity_2m", 
                    "precipitation", 
                    "rain", 
                    "surface_pressure", 
                    "cloud_cover", 
                    "cloud_cover_low", 
                    "cloud_cover_mid", 
                    "cloud_cover_high", 
                    "wind_speed_10m", 
                    "wind_direction_10m", 
                    "is_day"]
    }
    #responses = openmeteo.weather_api(url, params=params)
    try:
        time.sleep(5)
        responses = openmeteo.weather_api(url, params=params)
    except Exception as e:
        if 'Minutely API request limit exceeded' in str(e):
            print('Rate limit exceeded. Waiting for 1 minute...')
            time.sleep(60)
            # Retry the API request
            responses = openmeteo.weather_api(url, params=params)
        elif 'Hourly API request limit exceeded' in str(e):
            print('Rate limit exceeded. Waiting for 1 hour...')
            time.sleep(60*60)
            # Retry the API request
            responses = openmeteo.weather_api(url, params=params)
        else:
            raise  # Propagate other errors
    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]

    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
    hourly_rain = hourly.Variables(3).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(4).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(5).ValuesAsNumpy()
    hourly_cloud_cover_low = hourly.Variables(6).ValuesAsNumpy()
    hourly_cloud_cover_mid = hourly.Variables(7).ValuesAsNumpy()
    hourly_cloud_cover_high = hourly.Variables(8).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(9).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(10).ValuesAsNumpy()
    hourly_is_day = hourly.Variables(11).ValuesAsNumpy()
    hourly_data = {"FechaHora": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["cloud_cover"] = hourly_cloud_cover
    hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
    hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
    hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
    hourly_data["is_day"] = hourly_is_day
    hourly_data['CodigoEstacion'] = info_estaciones.iloc[fila]['CodigoEstacion']
    hourly_dataframe = pd.DataFrame(data = hourly_data)
    todo_meteorologia = pd.concat([todo_meteorologia, hourly_dataframe])
todo_meteorologia['FechaHora'] = todo_meteorologia['FechaHora'].dt.tz_localize(None)
todo_meteorologia.to_csv('meteorologia_v2.csv', index=False)
"""

meteorologia = pd.read_csv('meteorologia_v2.csv')
meteorologia['CodigoEstacion'] = meteorologia['CodigoEstacion'].astype(str)
meteorologia['FechaHora'] = pd.to_datetime(meteorologia['FechaHora'], format='%Y-%m-%d %H:%M:%S')

with open("festivos_locales_historicos.json", 'r', encoding='utf-8') as archivo:
    locales_json = json.load(archivo)
with open("festivos_regionales_historicos.json", 'r', encoding='utf-8') as archivo:
    regionales_json = json.load(archivo)
festivos_locales = pd.DataFrame(locales_json['data'])
festivos_regionales = pd.DataFrame(regionales_json['data'])
festivos_locales['fecha_festivo'] = pd.to_datetime(festivos_locales['fecha_festivo']).dt.date
festivos_regionales['fecha_festivo'] = pd.to_datetime(festivos_regionales['fecha_festivo']).dt.date
locales = festivos_locales[festivos_locales['municipio_nombre']=='Madrid']
locales = locales[(locales['fecha_festivo'] >= pd.to_datetime('2010-01-01').date()) & (locales['fecha_festivo'] <= pd.to_datetime('2023-12-31').date())] 
regionales = festivos_regionales[(festivos_regionales['fecha_festivo'] >= pd.to_datetime('2010-01-01').date()) & (festivos_regionales['fecha_festivo'] <= pd.to_datetime('2023-12-31').date())]
fechas_festivos = list(locales['fecha_festivo']) + list(regionales['fecha_festivo'])

#### 1.2. Integración de datos <a class="anchor" id="1.2."></a> 

In [None]:
import datetime
print('FECHAS FALTANTES')
print('================')
for e in pm10['CodigoEstacion'].unique():
    print(f'Estación {e}')
    pm10_est = pm10[pm10['CodigoEstacion']==e]
    fecha_minima = pm10_est.index.min().date()
    fecha_maxima = pm10_est.index.max().date()
    if fecha_maxima - fecha_minima == datetime.timedelta(days=len(set(pm10_est.index.date)) - 1):
        print("\tTodos los días están presentes en el rango de fechas.")
    else:
        print("\tFaltan días en el rango de fechas.")
print()
print('IMPUTACIÓN DE FECHAS FALTANTES')
print('==============================')
start_date = "2010-01-01"
end_date = "2023-12-31"
freq = "D"
all_timestamps = pd.date_range(start=start_date, end=end_date, freq=freq)
for e in pm10['CodigoEstacion'].unique():
    print(e)
    pm10_estacion = pm10[pm10['CodigoEstacion']==e]
    missing_timestamps = set(all_timestamps).difference(pm10_estacion.index)
    for timestamp in missing_timestamps:
        nueva_fila = pd.DataFrame([[e] + [np.nan]*24], columns=pm10.columns, index=[timestamp])
        pm10 = pd.concat([pm10, nueva_fila])
print()
print('FECHAS FALTANTE TRAS IMPUTACIÓN')
print('===============================')
for e in pm10['CodigoEstacion'].unique():
    print(f'Estación {e}')
    pm10_est = pm10[pm10['CodigoEstacion']==e]
    fecha_minima = pm10_est.index.min().date()
    fecha_maxima = pm10_est.index.max().date()

    # Verificar si todos los días están presentes
    if fecha_maxima - fecha_minima == datetime.timedelta(days=len(set(pm10_est.index.date)) - 1):
        print("\tTodos los días están presentes en el rango de fechas.")
    else:
        print("\tFaltan días en el rango de fechas.")
        
pm10 = pm10.sort_index()

In [None]:
from datetime import datetime
def serie_temporal_estacion(estacion):
    columnas_datos_zip_horas = pm10.drop(['CodigoEstacion'], axis=1, inplace=False).columns
    prueba_e = pm10[pm10['CodigoEstacion']==estacion]
    estaciones = []
    fechashoras = []
    nivelespm10 = []
    for i in range(len(prueba_e)):
        fila = prueba_e.iloc[i]
        estacion = fila['CodigoEstacion']
        fecha = fila.name.date()
        for ch in columnas_datos_zip_horas:
            h = ch.split('H')[1]
            h2 = str(int(h)-1).zfill(2)
            hora = h2+':00:00'
            fecha_hora = str(fecha)+' '+hora
            fecha_hora = datetime.strptime(fecha_hora, '%Y-%m-%d %H:%M:%S')
            dato = fila[ch]
            estaciones.append(estacion)
            fechashoras.append(fecha_hora)
            nivelespm10.append(dato)

    dic_aux = {'CodigoEstacion': estaciones,
               'FechaHora': fechashoras,
               'NivelesPM10': nivelespm10}

    st = pd.DataFrame(dic_aux)
    st = st.sort_values(by=['CodigoEstacion', 'FechaHora'])
    return st

pm10_st = pd.DataFrame()
for e in pm10['CodigoEstacion'].unique():
    st_e = serie_temporal_estacion(e)
    pm10_st = pd.concat([pm10_st, st_e])
    
df_merged = pd.merge(pm10_st, meteorologia, on=['FechaHora', 'CodigoEstacion'])
df_merged['Festivo'] = df_merged['FechaHora'].apply(lambda x: 1 if x.date() in fechas_festivos else 0)
inicio_confinamiento = pd.to_datetime('2020-03-14 00:00:00')
fin_confinamiento = pd.to_datetime('2020-05-02 23:59:59')
df_merged['ConfinamientoPandemia'] = ((df_merged['FechaHora'] >= inicio_confinamiento) & (df_merged['FechaHora'] <= fin_confinamiento)).astype(int)

#### 1.3.Estudio preliminar de los datos<a class="anchor" id="1.3."></a> 

Datos faltantes

In [None]:
df_merged.isnull().sum()

In [None]:
#Datos faltantes de Niveles de PM10 por estación
df = pm10.copy()
fig, axs = plt.subplots(len(df['CodigoEstacion'].unique()), 1, figsize=(12, 4*len(df['CodigoEstacion'].unique())))
for i, estacion in enumerate(df['CodigoEstacion'].unique()):
    df_estacion = df[df['CodigoEstacion'] == estacion]
    datos_faltantes = df_estacion.isnull().any(axis=1)
    porcentaje_faltantes_e = df_estacion.isnull().sum().sum() / (df_estacion.shape[0] * pm10.shape[1]) * 100
    print(f"Porcentaje de observaciones faltantes estacion {estacion}:", porcentaje_faltantes_e)
    axs[i].plot(df_estacion.index, datos_faltantes.astype(int), color='lightgrey', linewidth=1)
    axs[i].fill_between(df_estacion.index, datos_faltantes.astype(int), color='red', alpha=0.3)
    axs[i].set_title(f'Datos faltantes - Estación {estacion}')
    axs[i].set_ylabel('Datos faltantes')
    axs[i].set_yticks([0, 1])
    axs[i].set_yticklabels(['No faltante', 'Faltante'])
    axs[i].grid(True)
plt.tight_layout()
plt.show()

In [None]:
#Eliminamos estación 56
estacion_a_eliminar = '56'
df_merged = df_merged.drop(df_merged[df_merged['CodigoEstacion'] == estacion_a_eliminar].index)

In [None]:
df_merged['CodigoEstacion'].unique()

In [None]:
df_merged.isnull().sum()

Imputación datos faltantes

In [None]:
pm10_imputado = df_merged.copy()
for e in df_merged['CodigoEstacion'].unique():
    pm10_e = df_merged[df_merged['CodigoEstacion'] == e].copy()
    scaler = StandardScaler()
    x = pm10_e.select_dtypes(include='number')
    num_cols = list(x.columns)
    x_scaled = scaler.fit_transform(x)
    imputer = KNNImputer()
    imputed_data = imputer.fit_transform(x_scaled)
    imputed_data_original_scale = scaler.inverse_transform(imputed_data)
    pm10_imputado.loc[pm10_imputado['CodigoEstacion'] == e, num_cols] = imputed_data_original_scale

In [None]:
pm10_imputado.isnull().sum()

In [None]:
pm10_imputado.columns = ['CodigoEstacion', 'FechaHora', 'NivelesPM10', 'Temperatura', 'HumedadRelativa', 'Precipitacion',
                        'LLuvia', 'PresionSuperficie', 'NubosidadTotal', 'NubosidadBaja', 'NubosidadMedia', 'NubosidadAlta', 
                        'VelocidadViento', 'DireccionViento', 'DiaNoche', 'Festivo', 'ConfinamientoPandemia']

In [None]:
pm10_imputado.head()

Datos atípicos

In [None]:
df = pm10_imputado.copy()

fig, axs = plt.subplots(len(df['CodigoEstacion'].unique()), 3, figsize=(18, 4*len(df['CodigoEstacion'].unique())))

for i, estacion in enumerate(df['CodigoEstacion'].unique()):
    df_estacion = df[df['CodigoEstacion'] == estacion]

    # Histograma
    axs[i, 0].hist(df_estacion['NivelesPM10'], bins=20, color='skyblue', edgecolor='black')
    axs[i, 0].set_title(f'Histograma de PM10 - Estación {estacion}')
    axs[i, 0].set_xlabel('Niveles de PM10')
    axs[i, 0].set_ylabel('Frecuencia')
    axs[i, 0].grid(True)

    # Gráfico de evolución
    axs[i, 1].plot(df_estacion['FechaHora'], df_estacion['NivelesPM10'], color='salmon')
    axs[i, 1].set_title(f'Evolución de PM10 - Estación {estacion}')
    axs[i, 1].set_xlabel('Fecha y Hora')
    axs[i, 1].set_ylabel('Niveles de PM10')
    axs[i, 1].grid(True)

    # Calculando estadísticas
    min_val = np.min(df_estacion['NivelesPM10'])
    mean_val = np.mean(df_estacion['NivelesPM10'])
    median_val = np.median(df_estacion['NivelesPM10'])
    max_val = np.max(df_estacion['NivelesPM10'])
    std_val = np.std(df_estacion['NivelesPM10'])

    # Agregando cuadro de texto con estadísticas
    textstr = '\n'.join((
        f'Mínimo: {min_val}',
        f'Medio: {mean_val}',
        f'Mediano: {median_val}',
        f'Máximo: {max_val}',
        f'Desviación Típica: {std_val}'))
    axs[i, 2].text(0.1, 0.5, textstr, fontsize=10, verticalalignment='center')
    axs[i, 2].axis('off')

plt.tight_layout()
plt.show()

### 2. Preparación de datos <a class="anchor" id="2."></a> 

In [None]:
pm10_scaled = pm10_imputado.copy()
scaler = StandardScaler()
columns = pm10_scaled.drop(['NivelesPM10','CodigoEstacion', 'FechaHora', 'DiaNoche', 'Festivo', 'ConfinamientoPandemia'], axis=1, inplace=False).columns
pm10_scaled[columns] = scaler.fit_transform(pm10_scaled[columns])
pm10_scaled.index = pm10_scaled['FechaHora']
pm10_scaled.drop(['FechaHora'], axis=1, inplace=True)
pm10_scaled = pm10_scaled.sort_index()

In [None]:
pm10_scaled.to_csv('datosPM10escalados.csv', index=False)

In [None]:
plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='8']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Escuelas Aguirre (8))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='18']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Farolillo (18))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='24']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Casa de Campo (24))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='36']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Moratalaz (36))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='38']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Cuatro Caminos (38))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='40']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Vallecas (40))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='47']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Méndez Álvaro (47))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='48']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Castellana (48))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='50']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Plaza Castilla (50))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='55']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Urb. Embajada (55))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='57']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Sanchinarro (57))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

plt.figure(figsize=(10,4))
p = pm10_scaled[pm10_scaled['CodigoEstacion']=='60']
plt.plot(p.index, p['NivelesPM10'], color='#104861')
plt.title('Contaminación por partículas PM10 (estación de control Tres Olivos (60))')
plt.ylabel('Niveles PM10')
plt.xlabel('Hora')
plt.show()

### 3. Modelado y Evaluación <a class="anchor" id="3."></a> 

In [None]:
def create_X_y_datasets(dataset, look_back=1):
    X, y = [], []
    for i in range(len(dataset)-look_back-48):
        X.append(dataset.iloc[i:(i+48), 0:])
        y.append(dataset.iloc[i + look_back:i + look_back + 48, 0])
    return np.array(X), np.array(y)

In [None]:
e8 = '8'
df_e8 = pm10_scaled[pm10_scaled['CodigoEstacion']==e8]
df_e8 = df_e8.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e8 = pd.DataFrame()

X_e8, y_e8 = create_X_y_datasets(df_e8, look_back=1)

indices_train_e8 = df_e8[(df_e8.index.year >= 2010) & (df_e8.index.year <= 2021)].index
indices_val_e8 = df_e8[df_e8.index.year == 2022].index
indices_test_e8 = df_e8[df_e8.index.year == 2023].index

idx_train_e8 = [i for i, date in enumerate(df_e8.index[:-49]) if date in indices_train_e8]
idx_val_e8 = [i for i, date in enumerate(df_e8.index[:-49]) if date in indices_val_e8]
idx_test_e8 = [i for i, date in enumerate(df_e8.index[:-49]) if date in indices_test_e8]

X_train_e8, y_train_e8 = X_e8[idx_train_e8], y_e8[idx_train_e8]
X_val_e8, y_val_e8 = X_e8[idx_val_e8], y_e8[idx_val_e8]
X_test_e8, y_test_e8 = X_e8[idx_test_e8], y_e8[idx_test_e8]

e55 = '55'
df_e55 = pm10_scaled[pm10_scaled['CodigoEstacion']==e55]
df_e55 = df_e55.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e55 = pd.DataFrame()

X_e55, y_e55 = create_X_y_datasets(df_e55, look_back=1)

indices_train_e55 = df_e55[(df_e55.index.year >= 2010) & (df_e55.index.year <= 2021)].index
indices_val_e55 = df_e55[df_e55.index.year == 2022].index
indices_test_e55 = df_e55[df_e55.index.year == 2023].index

idx_train_e55 = [i for i, date in enumerate(df_e55.index[:-49]) if date in indices_train_e55]
idx_val_e55 = [i for i, date in enumerate(df_e55.index[:-49]) if date in indices_val_e55]
idx_test_e55 = [i for i, date in enumerate(df_e55.index[:-49]) if date in indices_test_e55]

X_train_e55, y_train_e55 = X_e55[idx_train_e55], y_e55[idx_train_e55]
X_val_e55, y_val_e55 = X_e55[idx_val_e55], y_e55[idx_val_e55]
X_test_e55, y_test_e55 = X_e55[idx_test_e55], y_e55[idx_test_e55]

e36 = '36'
df_e36 = pm10_scaled[pm10_scaled['CodigoEstacion']==e36]
df_e36 = df_e36.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e36 = pd.DataFrame()

X_e36, y_e36 = create_X_y_datasets(df_e36, look_back=1)

indices_train_e36 = df_e36[(df_e36.index.year >= 2010) & (df_e36.index.year <= 2021)].index
indices_val_e36 = df_e36[df_e36.index.year == 2022].index
indices_test_e36 = df_e36[df_e36.index.year == 2023].index

idx_train_e36 = [i for i, date in enumerate(df_e36.index[:-49]) if date in indices_train_e36]
idx_val_e36 = [i for i, date in enumerate(df_e36.index[:-49]) if date in indices_val_e36]
idx_test_e36 = [i for i, date in enumerate(df_e36.index[:-49]) if date in indices_test_e36]

X_train_e36, y_train_e36 = X_e36[idx_train_e36], y_e36[idx_train_e36]
X_val_e36, y_val_e36 = X_e36[idx_val_e36], y_e36[idx_val_e36]
X_test_e36, y_test_e36 = X_e36[idx_test_e36], y_e36[idx_test_e36]

e38 = '38'
df_e38 = pm10_scaled[pm10_scaled['CodigoEstacion']==e38]
df_e38 = df_e38.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e38 = pd.DataFrame()

X_e38, y_e38 = create_X_y_datasets(df_e38, look_back=1)

indices_train_e38 = df_e38[(df_e38.index.year >= 2010) & (df_e38.index.year <= 2021)].index
indices_val_e38 = df_e38[df_e38.index.year == 2022].index
indices_test_e38 = df_e38[df_e38.index.year == 2023].index

idx_train_e38 = [i for i, date in enumerate(df_e38.index[:-49]) if date in indices_train_e38]
idx_val_e38 = [i for i, date in enumerate(df_e38.index[:-49]) if date in indices_val_e38]
idx_test_e38 = [i for i, date in enumerate(df_e38.index[:-49]) if date in indices_test_e38]

X_train_e38, y_train_e38 = X_e38[idx_train_e38], y_e38[idx_train_e38]
X_val_e38, y_val_e38 = X_e38[idx_val_e38], y_e38[idx_val_e38]
X_test_e38, y_test_e38 = X_e38[idx_test_e38], y_e38[idx_test_e38]

e57 = '57'
df_e57 = pm10_scaled[pm10_scaled['CodigoEstacion']==e57]
df_e57 = df_e57.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e57 = pd.DataFrame()

X_e57, y_e57 = create_X_y_datasets(df_e57, look_back=1)

indices_train_e57 = df_e57[(df_e57.index.year >= 2010) & (df_e57.index.year <= 2021)].index
indices_val_e57 = df_e57[df_e57.index.year == 2022].index
indices_test_e57 = df_e57[df_e57.index.year == 2023].index

idx_train_e57 = [i for i, date in enumerate(df_e57.index[:-49]) if date in indices_train_e57]
idx_val_e57 = [i for i, date in enumerate(df_e57.index[:-49]) if date in indices_val_e57]
idx_test_e57 = [i for i, date in enumerate(df_e57.index[:-49]) if date in indices_test_e57]

X_train_e57, y_train_e57 = X_e57[idx_train_e57], y_e57[idx_train_e57]
X_val_e57, y_val_e57 = X_e57[idx_val_e57], y_e57[idx_val_e57]
X_test_e57, y_test_e57 = X_e57[idx_test_e57], y_e57[idx_test_e57]

e60 = '60'
df_e60 = pm10_scaled[pm10_scaled['CodigoEstacion']==e60]
df_e60 = df_e60.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e60 = pd.DataFrame()

X_e60, y_e60 = create_X_y_datasets(df_e60, look_back=1)

indices_train_e60 = df_e60[(df_e60.index.year >= 2010) & (df_e60.index.year <= 2021)].index
indices_val_e60 = df_e60[df_e60.index.year == 2022].index
indices_test_e60 = df_e60[df_e60.index.year == 2023].index

idx_train_e60 = [i for i, date in enumerate(df_e60.index[:-49]) if date in indices_train_e60]
idx_val_e60 = [i for i, date in enumerate(df_e60.index[:-49]) if date in indices_val_e60]
idx_test_e60 = [i for i, date in enumerate(df_e60.index[:-49]) if date in indices_test_e60]

X_train_e60, y_train_e60 = X_e60[idx_train_e60], y_e60[idx_train_e60]
X_val_e60, y_val_e60 = X_e60[idx_val_e60], y_e60[idx_val_e60]
X_test_e60, y_test_e60 = X_e60[idx_test_e60], y_e60[idx_test_e60]

e48 = '48'
df_e48 = pm10_scaled[pm10_scaled['CodigoEstacion']==e48]
df_e48 = df_e48.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e48 = pd.DataFrame()

X_e48, y_e48 = create_X_y_datasets(df_e48, look_back=1)

indices_train_e48 = df_e48[(df_e48.index.year >= 2010) & (df_e48.index.year <= 2021)].index
indices_val_e48 = df_e48[df_e48.index.year == 2022].index
indices_test_e48 = df_e48[df_e48.index.year == 2023].index

idx_train_e48 = [i for i, date in enumerate(df_e48.index[:-49]) if date in indices_train_e48]
idx_val_e48 = [i for i, date in enumerate(df_e48.index[:-49]) if date in indices_val_e48]
idx_test_e48 = [i for i, date in enumerate(df_e48.index[:-49]) if date in indices_test_e48]

X_train_e48, y_train_e48 = X_e48[idx_train_e48], y_e48[idx_train_e48]
X_val_e48, y_val_e48 = X_e48[idx_val_e48], y_e48[idx_val_e48]
X_test_e48, y_test_e48 = X_e48[idx_test_e48], y_e48[idx_test_e48]

e40 = '40'
df_e40 = pm10_scaled[pm10_scaled['CodigoEstacion']==e40]
df_e40 = df_e40.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e40 = pd.DataFrame()

X_e40, y_e40 = create_X_y_datasets(df_e40, look_back=1)

indices_train_e40 = df_e40[(df_e40.index.year >= 2010) & (df_e40.index.year <= 2021)].index
indices_val_e40 = df_e40[df_e40.index.year == 2022].index
indices_test_e40 = df_e40[df_e40.index.year == 2023].index

idx_train_e40 = [i for i, date in enumerate(df_e40.index[:-49]) if date in indices_train_e40]
idx_val_e40 = [i for i, date in enumerate(df_e40.index[:-49]) if date in indices_val_e40]
idx_test_e40 = [i for i, date in enumerate(df_e40.index[:-49]) if date in indices_test_e40]

X_train_e40, y_train_e40 = X_e40[idx_train_e40], y_e40[idx_train_e40]
X_val_e40, y_val_e40 = X_e40[idx_val_e40], y_e40[idx_val_e40]
X_test_e40, y_test_e40 = X_e40[idx_test_e40], y_e40[idx_test_e40]

e50 = '50'
df_e50 = pm10_scaled[pm10_scaled['CodigoEstacion']==e50]
df_e50 = df_e50.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e50 = pd.DataFrame()

X_e50, y_e50 = create_X_y_datasets(df_e50, look_back=1)

indices_train_e50 = df_e50[(df_e50.index.year >= 2010) & (df_e50.index.year <= 2021)].index
indices_val_e50 = df_e50[df_e50.index.year == 2022].index
indices_test_e50 = df_e50[df_e50.index.year == 2023].index

idx_train_e50 = [i for i, date in enumerate(df_e50.index[:-49]) if date in indices_train_e50]
idx_val_e50 = [i for i, date in enumerate(df_e50.index[:-49]) if date in indices_val_e50]
idx_test_e50 = [i for i, date in enumerate(df_e50.index[:-49]) if date in indices_test_e50]

X_train_e50, y_train_e50 = X_e50[idx_train_e50], y_e50[idx_train_e50]
X_val_e50, y_val_e50 = X_e50[idx_val_e50], y_e50[idx_val_e50]
X_test_e50, y_test_e50 = X_e50[idx_test_e50], y_e50[idx_test_e50]

e18 = '18'
df_e18 = pm10_scaled[pm10_scaled['CodigoEstacion']==e18]
df_e18 = df_e18.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e18 = pd.DataFrame()

X_e18, y_e18 = create_X_y_datasets(df_e18, look_back=1)

indices_train_e18 = df_e18[(df_e18.index.year >= 2010) & (df_e18.index.year <= 2021)].index
indices_val_e18 = df_e18[df_e18.index.year == 2022].index
indices_test_e18 = df_e18[df_e18.index.year == 2023].index

idx_train_e18 = [i for i, date in enumerate(df_e18.index[:-49]) if date in indices_train_e18]
idx_val_e18 = [i for i, date in enumerate(df_e18.index[:-49]) if date in indices_val_e18]
idx_test_e18 = [i for i, date in enumerate(df_e18.index[:-49]) if date in indices_test_e18]

X_train_e18, y_train_e18 = X_e18[idx_train_e18], y_e18[idx_train_e18]
X_val_e18, y_val_e18 = X_e18[idx_val_e18], y_e18[idx_val_e18]
X_test_e18, y_test_e18 = X_e18[idx_test_e18], y_e18[idx_test_e18]

e47 = '47'
df_e47 = pm10_scaled[pm10_scaled['CodigoEstacion']==e47]
df_e47 = df_e47.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e47 = pd.DataFrame()

X_e47, y_e47 = create_X_y_datasets(df_e47, look_back=1)

indices_train_e47 = df_e47[(df_e47.index.year >= 2010) & (df_e47.index.year <= 2021)].index
indices_val_e47 = df_e47[df_e47.index.year == 2022].index
indices_test_e47 = df_e47[df_e47.index.year == 2023].index

idx_train_e47 = [i for i, date in enumerate(df_e47.index[:-49]) if date in indices_train_e47]
idx_val_e47 = [i for i, date in enumerate(df_e47.index[:-49]) if date in indices_val_e47]
idx_test_e47 = [i for i, date in enumerate(df_e47.index[:-49]) if date in indices_test_e47]

X_train_e47, y_train_e47 = X_e47[idx_train_e47], y_e47[idx_train_e47]
X_val_e47, y_val_e47 = X_e47[idx_val_e47], y_e47[idx_val_e47]
X_test_e47, y_test_e47 = X_e47[idx_test_e47], y_e47[idx_test_e47]

e24 = '24'
df_e24 = pm10_scaled[pm10_scaled['CodigoEstacion']==e24]
df_e24 = df_e24.drop(['CodigoEstacion'], axis=1, inplace=False)

evaluacion_e24 = pd.DataFrame()

X_e24, y_e24 = create_X_y_datasets(df_e24, look_back=1)

indices_train_e24 = df_e24[(df_e24.index.year >= 2010) & (df_e24.index.year <= 2021)].index
indices_val_e24 = df_e24[df_e24.index.year == 2022].index
indices_test_e24 = df_e24[df_e24.index.year == 2023].index

idx_train_e24 = [i for i, date in enumerate(df_e24.index[:-49]) if date in indices_train_e24]
idx_val_e24 = [i for i, date in enumerate(df_e24.index[:-49]) if date in indices_val_e24]
idx_test_e24 = [i for i, date in enumerate(df_e24.index[:-49]) if date in indices_test_e24]

X_train_e24, y_train_e24 = X_e24[idx_train_e24], y_e24[idx_train_e24]
X_val_e24, y_val_e24 = X_e24[idx_val_e24], y_e24[idx_val_e24]
X_test_e24, y_test_e24 = X_e24[idx_test_e24], y_e24[idx_test_e24]

#### 3.1. Modelos Naive <a class="anchor" id="3.1."></a> 

In [None]:
stations = ['e_8', 'e_18', 'e_24', 
            'e_36', 'e_38', 'e_40', 
            'e_47', 'e_48', 'e_50', 
            'e_55', 'e_57', 'e_60']

dfs = [df_e8, df_e18, df_e24, 
       df_e36, df_e38, df_e40, 
       df_e47, df_e48, df_e50, 
       df_e55, df_e57, df_e60]

idx_trains = [idx_train_e8, idx_train_e18, idx_train_e24, 
              idx_train_e36, idx_train_e38, idx_train_e40, 
              idx_train_e47, idx_train_e48, idx_train_e50, 
              idx_train_e55, idx_train_e57, idx_train_e60]

idx_vals = [idx_val_e8, idx_val_e18, idx_val_e24, 
            idx_val_e36, idx_val_e38, idx_val_e40, 
            idx_val_e47, idx_val_e48, idx_val_e50, 
            idx_val_e55, idx_val_e57, idx_val_e60]

xs_trains = [X_train_e8, X_train_e18, X_train_e24,
             X_train_e36, X_train_e38, X_train_e40,
             X_train_e47, X_train_e48, X_train_e50,
             X_train_e55, X_train_e57, X_train_e60]

ys_trains = [y_train_e8, y_train_e18, y_train_e24,
             y_train_e36, y_train_e38, y_train_e40,
             y_train_e47, y_train_e48, y_train_e50,
             y_train_e55, y_train_e57, y_train_e60]

xs_vals = [X_val_e8, X_val_e18, X_val_e24,
             X_val_e36, X_val_e38, X_val_e40,
             X_val_e47, X_val_e48, X_val_e50,
             X_val_e55, X_val_e57, X_val_e60]

ys_vals = [y_val_e8, y_val_e18, y_val_e24,
             y_val_e36, y_val_e38, y_val_e40,
             y_val_e47, y_val_e48, y_val_e50,
             y_val_e55, y_val_e57, y_val_e60]

xs_tests = [X_test_e8, X_test_e18, X_test_e24,
             X_test_e36, X_test_e38, X_test_e40,
             X_test_e47, X_test_e48, X_test_e50,
             X_test_e55, X_test_e57, X_test_e60]

ys_tests = [y_test_e8, y_test_e18, y_test_e24,
             y_test_e36, y_test_e38, y_test_e40,
             y_test_e47, y_test_e48, y_test_e50,
             y_test_e55, y_test_e57, y_test_e60]

from sklearn.metrics import r2_score
r2ss = []
rmsess = []
preds_lr = {}
for i, s in enumerate(stations):
    #x_trainval = np.concatenate((xs_trains[i], xs_vals[i]))
    #x_trainval = x_trainval.reshape(x_trainval.shape[0], -1)
    auxs_st = dfs[i]['NivelesPM10'].iloc[len(idx_trains[i])+len(idx_vals[i])-8:].values
    pred_lr = np.empty((ys_tests[i].shape[0], 48))
    for j in range(len(ys_tests[i])):
        pred_lr[j] = auxs_st[j:j+48]
    preds_lr[s] = pred_lr
    # Evaluar el modelo
    rmse_2 = np.sqrt(np.mean((preds_lr[s] - ys_tests[i]) ** 2))
    r2 = r2_score(ys_tests[i], preds_lr[s])
    r2ss.append(r2)
    rmsess.append(rmse_2)

#### 3.2. Estación 8 <a class="anchor" id="3.2."></a> 

In [None]:
evaluacion_e8 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e8 = {}
resLSTM_u_e8, resLSTM_pred_e8, resLSTM_real_e8, resLSTM_modelos_e8 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e8.shape[1], X_train_e8.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e8, y_train_e8, validation_data=(X_val_e8, y_val_e8), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e8)
    
    resLSTM_u_e8.append(u)
    resLSTM_pred_e8.append(pred_lstm_u)
    resLSTM_real_e8.append(y_test_e8)
    resLSTM_modelos_e8.append(model_lstm)
    
resultados_LSTM_e8['UNITS'] = resLSTM_u_e8
resultados_LSTM_e8['PREDICCIONES'] = resLSTM_pred_e8
resultados_LSTM_e8['REALES'] = resLSTM_real_e8
resultados_LSTM_e8['RED'] = ['LSTM']*len(resLSTM_u_e8)
resultados_LSTM_e8['MODELO'] = resLSTM_modelos_e8

df_resultados_LSTM_e8 = pd.DataFrame(resultados_LSTM_e8)
evaluacion_e8 = pd.concat([evaluacion_e8, df_resultados_LSTM_e8])

resultados_GRU_e8 = {}
resGRU_u_e8, resGRU_pred_e8, resGRU_real_e8, resGRU_modelos_e8 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e8.shape[1], X_train_e8.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e8, y_train_e8, validation_data=(X_val_e8, y_val_e8), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e8)
    
    resGRU_u_e8.append(u)
    resGRU_pred_e8.append(pred_GRU_u)
    resGRU_real_e8.append(y_test_e8)
    resGRU_modelos_e8.append(model_GRU)
    
resultados_GRU_e8['UNITS'] = resGRU_u_e8
resultados_GRU_e8['PREDICCIONES'] = resGRU_pred_e8
resultados_GRU_e8['REALES'] = resGRU_real_e8
resultados_GRU_e8['RED'] = ['GRU']*len(resGRU_u_e8)
resultados_GRU_e8['MODELO'] = resGRU_modelos_e8

df_resultados_GRU_e8 = pd.DataFrame(resultados_GRU_e8)
evaluacion_e8 = pd.concat([evaluacion_e8, df_resultados_GRU_e8])
evaluacion_e8 = evaluacion_e8.reset_index(drop=True)
evaluacion_e8.head(6)

#### 3.3. Estación 18 <a class="anchor" id="3.3."></a> 

In [None]:
evaluacion_e18 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e18 = {}
resLSTM_u_e18, resLSTM_pred_e18, resLSTM_real_e18, resLSTM_modelos_e18 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e18.shape[1], X_train_e18.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e18, y_train_e18, validation_data=(X_val_e18, y_val_e18), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e18)
    
    resLSTM_u_e18.append(u)
    resLSTM_pred_e18.append(pred_lstm_u)
    resLSTM_real_e18.append(y_test_e18)
    resLSTM_modelos_e18.append(model_lstm)
    
resultados_LSTM_e18['UNITS'] = resLSTM_u_e18
resultados_LSTM_e18['PREDICCIONES'] = resLSTM_pred_e18
resultados_LSTM_e18['REALES'] = resLSTM_real_e18
resultados_LSTM_e18['RED'] = ['LSTM']*len(resLSTM_u_e18)
resultados_LSTM_e18['MODELO'] = resLSTM_modelos_e18

df_resultados_LSTM_e18 = pd.DataFrame(resultados_LSTM_e18)
evaluacion_e18 = pd.concat([evaluacion_e18, df_resultados_LSTM_e18])

resultados_GRU_e18 = {}
resGRU_u_e18, resGRU_pred_e18, resGRU_real_e18, resGRU_modelos_e18 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e18.shape[1], X_train_e18.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e18, y_train_e18, validation_data=(X_val_e18, y_val_e18), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e18)
    
    resGRU_u_e18.append(u)
    resGRU_pred_e18.append(pred_GRU_u)
    resGRU_real_e18.append(y_test_e18)
    resGRU_modelos_e18.append(model_GRU)
    
resultados_GRU_e18['UNITS'] = resGRU_u_e18
resultados_GRU_e18['PREDICCIONES'] = resGRU_pred_e18
resultados_GRU_e18['REALES'] = resGRU_real_e18
resultados_GRU_e18['RED'] = ['GRU']*len(resGRU_u_e18)
resultados_GRU_e18['MODELO'] = resGRU_modelos_e18

df_resultados_GRU_e18 = pd.DataFrame(resultados_GRU_e18)
evaluacion_e18 = pd.concat([evaluacion_e18, df_resultados_GRU_e18])
evaluacion_e18 = evaluacion_e18.reset_index(drop=True)
evaluacion_e18.head(6)

#### 3.4. Estación 24 <a class="anchor" id="3.4."></a> 

In [None]:
evaluacion_e24 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e24 = {}
resLSTM_u_e24, resLSTM_pred_e24, resLSTM_real_e24, resLSTM_modelos_e24 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e24.shape[1], X_train_e24.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e24, y_train_e24, validation_data=(X_val_e24, y_val_e24), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e24)
    
    resLSTM_u_e24.append(u)
    resLSTM_pred_e24.append(pred_lstm_u)
    resLSTM_real_e24.append(y_test_e24)
    resLSTM_modelos_e24.append(model_lstm)
    
resultados_LSTM_e24['UNITS'] = resLSTM_u_e24
resultados_LSTM_e24['PREDICCIONES'] = resLSTM_pred_e24
resultados_LSTM_e24['REALES'] = resLSTM_real_e24
resultados_LSTM_e24['RED'] = ['LSTM']*len(resLSTM_u_e24)
resultados_LSTM_e24['MODELO'] = resLSTM_modelos_e24

df_resultados_LSTM_e24 = pd.DataFrame(resultados_LSTM_e24)
evaluacion_e24 = pd.concat([evaluacion_e24, df_resultados_LSTM_e24])

resultados_GRU_e24 = {}
resGRU_u_e24, resGRU_pred_e24, resGRU_real_e24, resGRU_modelos_e24 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e24.shape[1], X_train_e24.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e24, y_train_e24, validation_data=(X_val_e24, y_val_e24), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e24)
    
    resGRU_u_e24.append(u)
    resGRU_pred_e24.append(pred_GRU_u)
    resGRU_real_e24.append(y_test_e24)
    resGRU_modelos_e24.append(model_GRU)
    
resultados_GRU_e24['UNITS'] = resGRU_u_e24
resultados_GRU_e24['PREDICCIONES'] = resGRU_pred_e24
resultados_GRU_e24['REALES'] = resGRU_real_e24
resultados_GRU_e24['RED'] = ['GRU']*len(resGRU_u_e24)
resultados_GRU_e24['MODELO'] = resGRU_modelos_e24

df_resultados_GRU_e24 = pd.DataFrame(resultados_GRU_e24)
evaluacion_e24 = pd.concat([evaluacion_e24, df_resultados_GRU_e24])
evaluacion_e24 = evaluacion_e24.reset_index(drop=True)
evaluacion_e24.head(6)

#### 3.5. Estación 36 <a class="anchor" id="3.5."></a> 

In [None]:
evaluacion_e36 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e36 = {}
resLSTM_u_e36, resLSTM_pred_e36, resLSTM_real_e36, resLSTM_modelos_e36 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e36.shape[1], X_train_e36.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e36, y_train_e36, validation_data=(X_val_e36, y_val_e36), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e36)
    
    resLSTM_u_e36.append(u)
    resLSTM_pred_e36.append(pred_lstm_u)
    resLSTM_real_e36.append(y_test_e36)
    resLSTM_modelos_e36.append(model_lstm)
    
resultados_LSTM_e36['UNITS'] = resLSTM_u_e36
resultados_LSTM_e36['PREDICCIONES'] = resLSTM_pred_e36
resultados_LSTM_e36['REALES'] = resLSTM_real_e36
resultados_LSTM_e36['RED'] = ['LSTM']*len(resLSTM_u_e36)
resultados_LSTM_e36['MODELO'] = resLSTM_modelos_e36

df_resultados_LSTM_e36 = pd.DataFrame(resultados_LSTM_e36)
evaluacion_e36 = pd.concat([evaluacion_e36, df_resultados_LSTM_e36])

resultados_GRU_e36 = {}
resGRU_u_e36, resGRU_pred_e36, resGRU_real_e36, resGRU_modelos_e36 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e36.shape[1], X_train_e36.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e36, y_train_e36, validation_data=(X_val_e36, y_val_e36), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e36)
    
    resGRU_u_e36.append(u)
    resGRU_pred_e36.append(pred_GRU_u)
    resGRU_real_e36.append(y_test_e36)
    resGRU_modelos_e36.append(model_GRU)
    
resultados_GRU_e36['UNITS'] = resGRU_u_e36
resultados_GRU_e36['PREDICCIONES'] = resGRU_pred_e36
resultados_GRU_e36['REALES'] = resGRU_real_e36
resultados_GRU_e36['RED'] = ['GRU']*len(resGRU_u_e36)
resultados_GRU_e36['MODELO'] = resGRU_modelos_e36

df_resultados_GRU_e36 = pd.DataFrame(resultados_GRU_e36)
evaluacion_e36 = pd.concat([evaluacion_e36, df_resultados_GRU_e36])
evaluacion_e36 = evaluacion_e36.reset_index(drop=True)
evaluacion_e36.head(6)

#### 3.6. Estación 38 <a class="anchor" id="3.6."></a> 

In [None]:
evaluacion_e38 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e38 = {}
resLSTM_u_e38, resLSTM_pred_e38, resLSTM_real_e38, resLSTM_modelos_e38 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e38.shape[1], X_train_e38.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e38, y_train_e38, validation_data=(X_val_e38, y_val_e38), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e38)
    
    resLSTM_u_e38.append(u)
    resLSTM_pred_e38.append(pred_lstm_u)
    resLSTM_real_e38.append(y_test_e38)
    resLSTM_modelos_e38.append(model_lstm)
    
resultados_LSTM_e38['UNITS'] = resLSTM_u_e38
resultados_LSTM_e38['PREDICCIONES'] = resLSTM_pred_e38
resultados_LSTM_e38['REALES'] = resLSTM_real_e38
resultados_LSTM_e38['RED'] = ['LSTM']*len(resLSTM_u_e38)
resultados_LSTM_e38['MODELO'] = resLSTM_modelos_e38

df_resultados_LSTM_e38 = pd.DataFrame(resultados_LSTM_e38)
evaluacion_e38 = pd.concat([evaluacion_e38, df_resultados_LSTM_e38])

resultados_GRU_e38 = {}
resGRU_u_e38, resGRU_pred_e38, resGRU_real_e38, resGRU_modelos_e38 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e38.shape[1], X_train_e38.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e38, y_train_e38, validation_data=(X_val_e38, y_val_e38), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e38)
    
    resGRU_u_e38.append(u)
    resGRU_pred_e38.append(pred_GRU_u)
    resGRU_real_e38.append(y_test_e38)
    resGRU_modelos_e38.append(model_GRU)
    
resultados_GRU_e38['UNITS'] = resGRU_u_e38
resultados_GRU_e38['PREDICCIONES'] = resGRU_pred_e38
resultados_GRU_e38['REALES'] = resGRU_real_e38
resultados_GRU_e38['RED'] = ['GRU']*len(resGRU_u_e38)
resultados_GRU_e38['MODELO'] = resGRU_modelos_e38

df_resultados_GRU_e38 = pd.DataFrame(resultados_GRU_e38)
evaluacion_e38 = pd.concat([evaluacion_e38, df_resultados_GRU_e38])
evaluacion_e38 = evaluacion_e38.reset_index(drop=True)
evaluacion_e38.head(6)

#### 3.7. Estación 40 <a class="anchor" id="3.7."></a> 

In [None]:
evaluacion_e40 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e40 = {}
resLSTM_u_e40, resLSTM_pred_e40, resLSTM_real_e40, resLSTM_modelos_e40 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e40.shape[1], X_train_e40.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e40, y_train_e40, validation_data=(X_val_e40, y_val_e40), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e40)
    
    resLSTM_u_e40.append(u)
    resLSTM_pred_e40.append(pred_lstm_u)
    resLSTM_real_e40.append(y_test_e40)
    resLSTM_modelos_e40.append(model_lstm)
    
resultados_LSTM_e40['UNITS'] = resLSTM_u_e40
resultados_LSTM_e40['PREDICCIONES'] = resLSTM_pred_e40
resultados_LSTM_e40['REALES'] = resLSTM_real_e40
resultados_LSTM_e40['RED'] = ['LSTM']*len(resLSTM_u_e40)
resultados_LSTM_e40['MODELO'] = resLSTM_modelos_e40

df_resultados_LSTM_e40 = pd.DataFrame(resultados_LSTM_e40)
evaluacion_e40 = pd.concat([evaluacion_e40, df_resultados_LSTM_e40])

resultados_GRU_e40 = {}
resGRU_u_e40, resGRU_pred_e40, resGRU_real_e40, resGRU_modelos_e40 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e40.shape[1], X_train_e40.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e40, y_train_e40, validation_data=(X_val_e40, y_val_e40), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e40)
    
    resGRU_u_e40.append(u)
    resGRU_pred_e40.append(pred_GRU_u)
    resGRU_real_e40.append(y_test_e40)
    resGRU_modelos_e40.append(model_GRU)
    
resultados_GRU_e40['UNITS'] = resGRU_u_e40
resultados_GRU_e40['PREDICCIONES'] = resGRU_pred_e40
resultados_GRU_e40['REALES'] = resGRU_real_e40
resultados_GRU_e40['RED'] = ['GRU']*len(resGRU_u_e40)
resultados_GRU_e40['MODELO'] = resGRU_modelos_e40

df_resultados_GRU_e40 = pd.DataFrame(resultados_GRU_e40)
evaluacion_e40 = pd.concat([evaluacion_e40, df_resultados_GRU_e40])
evaluacion_e40 = evaluacion_e40.reset_index(drop=True)
evaluacion_e40.head(6)

#### 3.8. Estación 47 <a class="anchor" id="3.8."></a> 

In [None]:
evaluacion_e47 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e47 = {}
resLSTM_u_e47, resLSTM_pred_e47, resLSTM_real_e47, resLSTM_modelos_e47 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e47.shape[1], X_train_e47.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e47, y_train_e47, validation_data=(X_val_e47, y_val_e47), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e47)
    
    resLSTM_u_e47.append(u)
    resLSTM_pred_e47.append(pred_lstm_u)
    resLSTM_real_e47.append(y_test_e47)
    resLSTM_modelos_e47.append(model_lstm)
    
resultados_LSTM_e47['UNITS'] = resLSTM_u_e47
resultados_LSTM_e47['PREDICCIONES'] = resLSTM_pred_e47
resultados_LSTM_e47['REALES'] = resLSTM_real_e47
resultados_LSTM_e47['RED'] = ['LSTM']*len(resLSTM_u_e47)
resultados_LSTM_e47['MODELO'] = resLSTM_modelos_e47

df_resultados_LSTM_e47 = pd.DataFrame(resultados_LSTM_e47)
evaluacion_e47 = pd.concat([evaluacion_e47, df_resultados_LSTM_e47])

resultados_GRU_e47 = {}
resGRU_u_e47, resGRU_pred_e47, resGRU_real_e47, resGRU_modelos_e47 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e47.shape[1], X_train_e47.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e47, y_train_e47, validation_data=(X_val_e47, y_val_e47), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e47)
    
    resGRU_u_e47.append(u)
    resGRU_pred_e47.append(pred_GRU_u)
    resGRU_real_e47.append(y_test_e47)
    resGRU_modelos_e47.append(model_GRU)
    
resultados_GRU_e47['UNITS'] = resGRU_u_e47
resultados_GRU_e47['PREDICCIONES'] = resGRU_pred_e47
resultados_GRU_e47['REALES'] = resGRU_real_e47
resultados_GRU_e47['RED'] = ['GRU']*len(resGRU_u_e47)
resultados_GRU_e47['MODELO'] = resGRU_modelos_e47

df_resultados_GRU_e47 = pd.DataFrame(resultados_GRU_e47)
evaluacion_e47 = pd.concat([evaluacion_e47, df_resultados_GRU_e47])
evaluacion_e47 = evaluacion_e47.reset_index(drop=True)
evaluacion_e47.head(6)

#### 3.9. Estación 48 <a class="anchor" id="3.9."></a> 

In [None]:
evaluacion_e48 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e48 = {}
resLSTM_u_e48, resLSTM_pred_e48, resLSTM_real_e48, resLSTM_modelos_e48 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e48.shape[1], X_train_e48.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e48, y_train_e48, validation_data=(X_val_e48, y_val_e48), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e48)
    
    resLSTM_u_e48.append(u)
    resLSTM_pred_e48.append(pred_lstm_u)
    resLSTM_real_e48.append(y_test_e48)
    resLSTM_modelos_e48.append(model_lstm)
    
resultados_LSTM_e48['UNITS'] = resLSTM_u_e48
resultados_LSTM_e48['PREDICCIONES'] = resLSTM_pred_e48
resultados_LSTM_e48['REALES'] = resLSTM_real_e48
resultados_LSTM_e48['RED'] = ['LSTM']*len(resLSTM_u_e48)
resultados_LSTM_e48['MODELO'] = resLSTM_modelos_e48

df_resultados_LSTM_e48 = pd.DataFrame(resultados_LSTM_e48)
evaluacion_e48 = pd.concat([evaluacion_e48, df_resultados_LSTM_e48])

resultados_GRU_e48 = {}
resGRU_u_e48, resGRU_pred_e48, resGRU_real_e48, resGRU_modelos_e48 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e48.shape[1], X_train_e48.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e48, y_train_e48, validation_data=(X_val_e48, y_val_e48), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e48)
    
    resGRU_u_e48.append(u)
    resGRU_pred_e48.append(pred_GRU_u)
    resGRU_real_e48.append(y_test_e48)
    resGRU_modelos_e48.append(model_GRU)
    
resultados_GRU_e48['UNITS'] = resGRU_u_e48
resultados_GRU_e48['PREDICCIONES'] = resGRU_pred_e48
resultados_GRU_e48['REALES'] = resGRU_real_e48
resultados_GRU_e48['RED'] = ['GRU']*len(resGRU_u_e48)
resultados_GRU_e48['MODELO'] = resGRU_modelos_e48

df_resultados_GRU_e48 = pd.DataFrame(resultados_GRU_e48)
evaluacion_e48 = pd.concat([evaluacion_e48, df_resultados_GRU_e48])
evaluacion_e48 = evaluacion_e48.reset_index(drop=True)
evaluacion_e48.head(6)

#### 3.10. Estación 50 <a class="anchor" id="3.10."></a> 

In [None]:
evaluacion_e50 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e50 = {}
resLSTM_u_e50, resLSTM_pred_e50, resLSTM_real_e50, resLSTM_modelos_e50 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e50.shape[1], X_train_e50.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e50, y_train_e50, validation_data=(X_val_e50, y_val_e50), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e50)
    
    resLSTM_u_e50.append(u)
    resLSTM_pred_e50.append(pred_lstm_u)
    resLSTM_real_e50.append(y_test_e50)
    resLSTM_modelos_e50.append(model_lstm)
    
resultados_LSTM_e50['UNITS'] = resLSTM_u_e50
resultados_LSTM_e50['PREDICCIONES'] = resLSTM_pred_e50
resultados_LSTM_e50['REALES'] = resLSTM_real_e50
resultados_LSTM_e50['RED'] = ['LSTM']*len(resLSTM_u_e50)
resultados_LSTM_e50['MODELO'] = resLSTM_modelos_e50

df_resultados_LSTM_e50 = pd.DataFrame(resultados_LSTM_e50)
evaluacion_e50 = pd.concat([evaluacion_e50, df_resultados_LSTM_e50])

resultados_GRU_e50 = {}
resGRU_u_e50, resGRU_pred_e50, resGRU_real_e50, resGRU_modelos_e50 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e50.shape[1], X_train_e50.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e50, y_train_e50, validation_data=(X_val_e50, y_val_e50), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e50)
    
    resGRU_u_e50.append(u)
    resGRU_pred_e50.append(pred_GRU_u)
    resGRU_real_e50.append(y_test_e50)
    resGRU_modelos_e50.append(model_GRU)
    
resultados_GRU_e50['UNITS'] = resGRU_u_e50
resultados_GRU_e50['PREDICCIONES'] = resGRU_pred_e50
resultados_GRU_e50['REALES'] = resGRU_real_e50
resultados_GRU_e50['RED'] = ['GRU']*len(resGRU_u_e50)
resultados_GRU_e50['MODELO'] = resGRU_modelos_e50

df_resultados_GRU_e50 = pd.DataFrame(resultados_GRU_e50)
evaluacion_e50 = pd.concat([evaluacion_e50, df_resultados_GRU_e50])
evaluacion_e50 = evaluacion_e50.reset_index(drop=True)
evaluacion_e50.head(6)

#### 3.11. Estación 55 <a class="anchor" id="3.11."></a> 

In [None]:
evaluacion_e55 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e55 = {}
resLSTM_u_e55, resLSTM_pred_e55, resLSTM_real_e55, resLSTM_modelos_e55 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e55.shape[1], X_train_e55.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e55, y_train_e55, validation_data=(X_val_e55, y_val_e55), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e55)
    
    resLSTM_u_e55.append(u)
    resLSTM_pred_e55.append(pred_lstm_u)
    resLSTM_real_e55.append(y_test_e55)
    resLSTM_modelos_e55.append(model_lstm)
    
resultados_LSTM_e55['UNITS'] = resLSTM_u_e55
resultados_LSTM_e55['PREDICCIONES'] = resLSTM_pred_e55
resultados_LSTM_e55['REALES'] = resLSTM_real_e55
resultados_LSTM_e55['RED'] = ['LSTM']*len(resLSTM_u_e55)
resultados_LSTM_e55['MODELO'] = resLSTM_modelos_e55

df_resultados_LSTM_e55 = pd.DataFrame(resultados_LSTM_e55)
evaluacion_e55 = pd.concat([evaluacion_e55, df_resultados_LSTM_e55])

resultados_GRU_e55 = {}
resGRU_u_e55, resGRU_pred_e55, resGRU_real_e55, resGRU_modelos_e55 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e55.shape[1], X_train_e55.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e55, y_train_e55, validation_data=(X_val_e55, y_val_e55), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e55)
    
    resGRU_u_e55.append(u)
    resGRU_pred_e55.append(pred_GRU_u)
    resGRU_real_e55.append(y_test_e55)
    resGRU_modelos_e55.append(model_GRU)
    
resultados_GRU_e55['UNITS'] = resGRU_u_e55
resultados_GRU_e55['PREDICCIONES'] = resGRU_pred_e55
resultados_GRU_e55['REALES'] = resGRU_real_e55
resultados_GRU_e55['RED'] = ['GRU']*len(resGRU_u_e55)
resultados_GRU_e55['MODELO'] = resGRU_modelos_e55

df_resultados_GRU_e55 = pd.DataFrame(resultados_GRU_e55)
evaluacion_e55 = pd.concat([evaluacion_e55, df_resultados_GRU_e55])
evaluacion_e55 = evaluacion_e55.reset_index(drop=True)
evaluacion_e55.head(6)

#### 3.12. Estación 57 <a class="anchor" id="3.12."></a> 

In [None]:
evaluacion_e57 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e57 = {}
resLSTM_u_e57, resLSTM_pred_e57, resLSTM_real_e57, resLSTM_modelos_e57 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e57.shape[1], X_train_e57.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e57, y_train_e57, validation_data=(X_val_e57, y_val_e57), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e57)
    
    resLSTM_u_e57.append(u)
    resLSTM_pred_e57.append(pred_lstm_u)
    resLSTM_real_e57.append(y_test_e57)
    resLSTM_modelos_e57.append(model_lstm)
    
resultados_LSTM_e57['UNITS'] = resLSTM_u_e57
resultados_LSTM_e57['PREDICCIONES'] = resLSTM_pred_e57
resultados_LSTM_e57['REALES'] = resLSTM_real_e57
resultados_LSTM_e57['RED'] = ['LSTM']*len(resLSTM_u_e57)
resultados_LSTM_e57['MODELO'] = resLSTM_modelos_e57

df_resultados_LSTM_e57 = pd.DataFrame(resultados_LSTM_e57)
evaluacion_e57 = pd.concat([evaluacion_e57, df_resultados_LSTM_e57])

resultados_GRU_e57 = {}
resGRU_u_e57, resGRU_pred_e57, resGRU_real_e57, resGRU_modelos_e57 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e57.shape[1], X_train_e57.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e57, y_train_e57, validation_data=(X_val_e57, y_val_e57), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e57)
    
    resGRU_u_e57.append(u)
    resGRU_pred_e57.append(pred_GRU_u)
    resGRU_real_e57.append(y_test_e57)
    resGRU_modelos_e57.append(model_GRU)
    
resultados_GRU_e57['UNITS'] = resGRU_u_e57
resultados_GRU_e57['PREDICCIONES'] = resGRU_pred_e57
resultados_GRU_e57['REALES'] = resGRU_real_e57
resultados_GRU_e57['RED'] = ['GRU']*len(resGRU_u_e57)
resultados_GRU_e57['MODELO'] = resGRU_modelos_e57

df_resultados_GRU_e57 = pd.DataFrame(resultados_GRU_e57)
evaluacion_e57 = pd.concat([evaluacion_e57, df_resultados_GRU_e57])
evaluacion_e57 = evaluacion_e57.reset_index(drop=True)
evaluacion_e57.head(6)

#### 3.13. Estación 60 <a class="anchor" id="3.13."></a> 

In [None]:
evaluacion_e60 = pd.DataFrame()

units = [32, 64, 100]

resultados_LSTM_e60 = {}
resLSTM_u_e60, resLSTM_pred_e60, resLSTM_real_e60, resLSTM_modelos_e60 = [], [], [], []

print('LSTM')
for u in units:
    print('\tConfiguracion:', u)
    model_lstm = Sequential([
        Input(shape=(X_train_e60.shape[1], X_train_e60.shape[2])),
        LSTM(units=u),  
        Dense(units=48)])
    model_lstm.compile(loss='mse', optimizer='adam')
    history_model_lstm = model_lstm.fit(X_train_e60, y_train_e60, validation_data=(X_val_e60, y_val_e60), epochs=1, batch_size=32, verbose=False)
    pred_lstm_u = model_lstm.predict(X_test_e60)
    
    resLSTM_u_e60.append(u)
    resLSTM_pred_e60.append(pred_lstm_u)
    resLSTM_real_e60.append(y_test_e60)
    resLSTM_modelos_e60.append(model_lstm)
    
resultados_LSTM_e60['UNITS'] = resLSTM_u_e60
resultados_LSTM_e60['PREDICCIONES'] = resLSTM_pred_e60
resultados_LSTM_e60['REALES'] = resLSTM_real_e60
resultados_LSTM_e60['RED'] = ['LSTM']*len(resLSTM_u_e60)
resultados_LSTM_e60['MODELO'] = resLSTM_modelos_e60

df_resultados_LSTM_e60 = pd.DataFrame(resultados_LSTM_e60)
evaluacion_e60 = pd.concat([evaluacion_e60, df_resultados_LSTM_e60])

resultados_GRU_e60 = {}
resGRU_u_e60, resGRU_pred_e60, resGRU_real_e60, resGRU_modelos_e60 = [], [], [], []

print('GRU')
for u in units:
    print('\tConfiguracion:', u)
    model_GRU = Sequential([
        Input(shape=(X_train_e60.shape[1], X_train_e60.shape[2])),
        GRU(units=u),  
        Dense(units=48)])
    model_GRU.compile(loss='mse', optimizer='adam')
    history_model_GRU = model_GRU.fit(X_train_e60, y_train_e60, validation_data=(X_val_e60, y_val_e60), epochs=1, batch_size=32, verbose=False)
    pred_GRU_u = model_GRU.predict(X_test_e60)
    
    resGRU_u_e60.append(u)
    resGRU_pred_e60.append(pred_GRU_u)
    resGRU_real_e60.append(y_test_e60)
    resGRU_modelos_e60.append(model_GRU)
    
resultados_GRU_e60['UNITS'] = resGRU_u_e60
resultados_GRU_e60['PREDICCIONES'] = resGRU_pred_e60
resultados_GRU_e60['REALES'] = resGRU_real_e60
resultados_GRU_e60['RED'] = ['GRU']*len(resGRU_u_e60)
resultados_GRU_e60['MODELO'] = resGRU_modelos_e60

df_resultados_GRU_e60 = pd.DataFrame(resultados_GRU_e60)
evaluacion_e60 = pd.concat([evaluacion_e60, df_resultados_GRU_e60])
evaluacion_e60 = evaluacion_e60.reset_index(drop=True)
evaluacion_e60.head(6)

In [None]:
evaluaciones = pd.concat([evaluacion_e8,
                        evaluacion_e18,
                        evaluacion_e24,
                        evaluacion_e36,
                        evaluacion_e38,
                        evaluacion_e40,
                        evaluacion_e47,
                        evaluacion_e48,
                        evaluacion_e50,
                        evaluacion_e55,
                        evaluacion_e57,
                        evaluacion_e60])

In [None]:
evaluaciones.head()

In [None]:
evaluaciones.to_csv('evaluaciones_modelos_TFM.csv')

### 4. Análisis de resultados <a class="anchor" id="4."></a> 

In [None]:
import pandas as pd

p = evaluaciones.copy()
col_est = ['8']*6+['18']*6+['24']*6+['36']*6+['38']*6+['40']*6+['47']*6+['48']*6+['50']*6+['55']*6+['57']*6+['60']*6
p['ESTACION'] = col_est
p['MODELO'] = p['RED'] + ' ' + p['UNITS'].astype(str)

dfs = []
for station, pred_naive, y_test in [(8, preds_naive['e_8'], y_test_e8), 
                                    (18, preds_naive['e_18'], y_test_e18),
                                    (24, preds_naive['e_24'], y_test_e24),
                                    (36, preds_naive['e_36'], y_test_e36),
                                    (38, preds_naive['e_38'], y_test_e38), 
                                    (40, preds_naive['e_40'], y_test_e40),
                                    (47, preds_naive['e_47'], y_test_e47), 
                                    (48, preds_naive['e_48'], y_test_e48),
                                    (50, preds_naive['e_50'], y_test_e50), 
                                    (55, preds_naive['e_55'], y_test_e55),
                                    (57, preds_naive['e_57'], y_test_e57), 
                                    (60, preds_naive['e_60'], y_test_e60)]:
    df = pd.DataFrame({'UNITS': [''], 'PREDICCIONES': [pred_naive], 'REALES': [y_test], 'RED': ['Naive'], 'MODELO': ['Naive'], 'ESTACION': [str(station)]})
    dfs.append(df)

for station, pred_naive, y_test in [(8, preds_media['e_8'], y_test_e8), 
                                    (18, preds_media['e_18'], y_test_e18),
                                    (24, preds_media['e_24'], y_test_e24),
                                    (36, preds_media['e_36'], y_test_e36),
                                    (38, preds_media['e_38'], y_test_e38), 
                                    (40, preds_media['e_40'], y_test_e40),
                                    (47, preds_media['e_47'], y_test_e47), 
                                    (48, preds_media['e_48'], y_test_e48),
                                    (50, preds_media['e_50'], y_test_e50), 
                                    (55, preds_media['e_55'], y_test_e55),
                                    (57, preds_media['e_57'], y_test_e57), 
                                    (60, preds_media['e_60'], y_test_e60)]:
    df = pd.DataFrame({'UNITS': [''], 'PREDICCIONES': [pred_naive], 'REALES': [y_test], 'RED': ['Media'], 'MODELO': ['Media'], 'ESTACION': [str(station)]})
    dfs.append(df)
    
for station, pred_naive, y_test in [(8, preds_48hrs['e_8'], y_test_e8), 
                                    (18, preds_48hrs['e_18'], y_test_e18),
                                    (24, preds_48hrs['e_24'], y_test_e24),
                                    (36, preds_48hrs['e_36'], y_test_e36),
                                    (38, preds_48hrs['e_38'], y_test_e38), 
                                    (40, preds_48hrs['e_40'], y_test_e40),
                                    (47, preds_48hrs['e_47'], y_test_e47), 
                                    (48, preds_48hrs['e_48'], y_test_e48),
                                    (50, preds_48hrs['e_50'], y_test_e50), 
                                    (55, preds_48hrs['e_55'], y_test_e55),
                                    (57, preds_48hrs['e_57'], y_test_e57), 
                                    (60, preds_48hrs['e_60'], y_test_e60)]:
    df = pd.DataFrame({'UNITS': [''], 'PREDICCIONES': [pred_naive], 'REALES': [y_test], 'RED': ['Steps'], 'MODELO': ['Steps'], 'ESTACION': [str(station)]})
    dfs.append(df)

p = pd.concat([p] + dfs, ignore_index=True)


In [None]:
import numpy as np
from sklearn.metrics import r2_score

p['ESTACION_n'] = 'Estacion ' + p['ESTACION']

anyadir = []
anyadir_2 = []
r2_values = []

for i in range(len(p)):
    row = p.iloc[i]
    pred = row['PREDICCIONES']
    real = row['REALES']
    
    rmse = np.sqrt(((pred - real) ** 2))
    rmse_2 = np.sqrt(np.mean((pred - real) ** 2))

    r2 = r2_score(real, pred)
    anyadir.append(rmse)
    anyadir_2.append(rmse_2)
    r2_values.append(r2)

p['RMSE_ind'] = anyadir
p['RMSE_med'] = anyadir_2
p['R2'] = r2_values


In [None]:
p['MODELO'] = p['RED'] + ' ' + p['UNITS'].astype(str)
p['ESTACION_n'] = 'Estacion '+p['ESTACION']

#### 4.1. Medias RMSE y R2 por modelo <a class="anchor" id="4.1."></a> 

In [None]:
media_rmse_por_modelo = p.groupby('MODELO')['RMSE_med'].mean()
media_rmse_por_modelo

In [None]:
media_r2_por_modelo = p.groupby('MODELO')['R2'].mean()
media_r2_por_modelo

In [None]:
modelos = media_rmse_por_modelo.index
rmse_med = media_rmse_por_modelo.values

colores_gru = ['darkslategray', 'darkslategray', 'darkslategray']
colores_lstm = ['coral', 'coral', 'coral']

fig, ax = plt.subplots(figsize=(16,4.5))

for i, modelo in enumerate(modelos[:3]):
    ax.barh(modelo, rmse_med[i], color=colores_gru[i], label=modelo)
    ax.text(rmse_med[i]+0.1, i, '{:.2f}'.format(rmse_med[i]), ha='left', va='center')

for i, modelo in enumerate(modelos[3:6]):
    ax.barh(modelo, rmse_med[i+3], color=colores_lstm[i], label=modelo)
    ax.text(rmse_med[i+3]+0.1, i+3, '{:.2f}'.format(rmse_med[i+3]), ha='left', va='center')

ax.barh(modelos[-2], rmse_med[-2], color='lightgoldenrodyellow', label=modelos[-2])
ax.text(rmse_med[-2]+0.1, 6, '{:.2f}'.format(rmse_med[-2]), ha='left', va='center')

ax.barh(modelos[-1], rmse_med[-1], color='lightgoldenrodyellow', label=modelos[-1])
ax.text(rmse_med[-1]+0.1, 7, '{:.2f}'.format(rmse_med[-1]), ha='left', va='center')

ax.set_xlabel('RMSE')
ax.set_ylabel('Modelo')
ax.set_title('RMSE por Modelo')

ax.legend().remove()

plt.show()

In [None]:
modelos = media_r2_por_modelo.index
r2_med = media_r2_por_modelo.values

def posicion_etiqueta(valor, factor=0.005, minimo=0.01):
    return valor + max(valor * factor, minimo)

colores_gru = ['darkslategray', 'darkslategray', 'darkslategray']
colores_lstm = ['darkcyan', 'darkcyan', 'darkcyan']

fig, ax = plt.subplots(figsize=(16,4.5))

for i, modelo in enumerate(modelos[:3]):
    ax.barh(modelo, r2_med[i], color=colores_gru[i], label=modelo)
    ax.text(posicion_etiqueta(r2_med[i]), i, '{:.2f}'.format(r2_med[i]), ha='left', va='center')

for i, modelo in enumerate(modelos[3:6]):
    ax.barh(modelo, r2_med[i+3], color=colores_lstm[i], label=modelo)
    ax.text(posicion_etiqueta(r2_med[i+3]), i+3, '{:.2f}'.format(r2_med[i+3]), ha='left', va='center')

ax.barh(modelos[-1], r2_med[-1], color='powderblue', label=modelos[-1])
ax.text(posicion_etiqueta(r2_med[-1]), 6, '{:.2f}'.format(r2_med[-1]), ha='left', va='center')

# Ajustar el diseño
ax.set_xlabel('R2')
ax.set_ylabel('Modelo')
ax.set_title('R2 por Modelo')

# Eliminar leyenda
ax.legend().remove()

# Mostrar el gráfico
plt.show()

#### 4.2. Contrastes DM unilaterales <a class="anchor" id="4.2."></a> 

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import t
from dieboldmariano import dm_test, ZeroVarianceException
d =  {'ESTACION':[], 'MODELO 1':[], 'MODELO 2':[], 'DM':[], 'PVALUE':[], 'DIFERENCIA':[]}
for e in p['ESTACION'].unique():
    p_e = p[p['ESTACION']==e]
    print(p_e['ESTACION_n'].unique()[0])
    print()
    for i in range(len(p_e)):
        for j in range(len(p_e)):
            if i != j:
                m1 = p_e.iloc[i]
                m2 = p_e.iloc[j]
                print('Modelos:')
                print('\tModelo 1: ', m1['MODELO'])
                print('\tModelo 2: ', m2['MODELO'])
                DM_stat, p_value = dm_test(m1['REALES'].flatten(), m1['PREDICCIONES'].flatten(), m2['PREDICCIONES'].flatten(),
                                          h=48, one_sided=True)
                print()
                print()
                print(f'Estadístico DM: {DM_stat}, p-valor: {p_value}')
                alpha = 0.005
                d['ESTACION'].append(p_e['ESTACION_n'])
                d['MODELO 1'].append(m1['MODELO'])
                d['MODELO 2'].append(m2['MODELO'])
                d['DM'].append(DM_stat)
                d['PVALUE'].append(p_value)
                if p_value < alpha:
                    print()
                    print("Hay una diferencia significativa entre los modelos (rechazamos H0)")
                    d['DIFERENCIA'].append(1)
                else:
                    print("No hay una diferencia significativa entre los modelos (no rechazamos H0)")
                    d['DIFERENCIA'].append(0)
                print()

df_DMS_uni = pd.DataFrame(data=d)
df_DMS_uni['ESTACION'] = df_DMS_uni['ESTACION'].apply(lambda x: x.iloc[0] if isinstance(x, pd.Series) else x)

In [None]:
df_DMS_uni.to_csv('df_dms_1.csv', index=False)

#### 4.3. Contrastes DM bilaterales <a class="anchor" id="4.3."></a> 

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import t
from dieboldmariano import dm_test, ZeroVarianceException
d =  {'ESTACION':[], 'MODELO 1':[], 'MODELO 2':[], 'DM':[], 'PVALUE':[], 'DIFERENCIA':[]}
for e in p['ESTACION'].unique():
    p_e = p[p['ESTACION']==e]
    print(p_e['ESTACION_n'].unique()[0])
    print()
    for i in range(len(p_e)):
        for j in range(len(p_e)):
            if i != j:
                m1 = p_e.iloc[i]
                m2 = p_e.iloc[j]
                print('Modelos:')
                print('\tModelo 1: ', m1['MODELO'])
                print('\tModelo 2: ', m2['MODELO'])
                DM_stat, p_value = dm_test(m1['REALES'].flatten(), m1['PREDICCIONES'].flatten(), m2['PREDICCIONES'].flatten(),
                                          h=48, one_sided=False)
                print()
                print()
                print(f'Estadístico DM: {DM_stat}, p-valor: {p_value}')
                alpha = 0.005
                d['ESTACION'].append(p_e['ESTACION_n'])
                d['MODELO 1'].append(m1['MODELO'])
                d['MODELO 2'].append(m2['MODELO'])
                d['DM'].append(DM_stat)
                d['PVALUE'].append(p_value)
                if p_value < alpha:
                    print()
                    print("Hay una diferencia significativa entre los modelos (rechazamos H0)")
                    d['DIFERENCIA'].append(1)
                else:
                    print("No hay una diferencia significativa entre los modelos (no rechazamos H0)")
                    d['DIFERENCIA'].append(0)
                print()

df_DMS_bil = pd.DataFrame(data=d)
df_DMS_bil['ESTACION'] = df_DMS_bil['ESTACION'].apply(lambda x: x.iloc[0] if isinstance(x, pd.Series) else x)

In [None]:
df_DMS_bil.to_csv('df_dms_2.csv', index=False)

#### 4.4. Veces que los modelos tienen diferencias significativas <a class="anchor" id="4.4."></a> 

In [None]:
modelos = p['MODELO'].unique()[:-1]
tabla = pd.DataFrame(index=modelos, columns=modelos)

for modelo1 in modelos:
    for modelo2 in modelos:
        if modelo1 != modelo2:
            suma_dif = df_DMS_bil[(df_DMS_bil['MODELO 1'] == modelo1) & (df_DMS_bil['MODELO 2'] == modelo2)]['DIFERENCIA'].sum()
            tabla.loc[modelo1, modelo2] = suma_dif

for i in range(len(modelos)):
    tabla.iloc[i, i] = '-'

print()
print(tabla)
print()

#### 4.5. RMSE y R2 por modelo y estación <a class="anchor" id="4.5."></a> 

In [None]:
modelos = p['MODELO'].unique()
estaciones = p['ESTACION_n'].unique()
tabla2 = pd.DataFrame(index=modelos, columns=estaciones)
tabla3 = pd.DataFrame(index=modelos, columns=estaciones)
for mod in modelos:
    for est in estaciones:
        tabla2.loc[mod, est] = p[(p['MODELO']==mod)&(p['ESTACION_n']==est)]['RMSE_med'].mean()
        tabla3.loc[mod, est] = p[(p['MODELO']==mod)&(p['ESTACION_n']==est)]['R2'].mean()

In [None]:
hm = tabla2.T.copy()
hm['LSTM 32'] = hm['LSTM 32'].astype(float)
hm['LSTM 64'] = hm['LSTM 64'].astype(float)
hm['LSTM 100'] = hm['LSTM 100'].astype(float)
hm['GRU 32'] = hm['GRU 32'].astype(float)
hm['GRU 64'] = hm['GRU 64'].astype(float)
hm['GRU 100'] = hm['GRU 100'].astype(float)
hm['Naive '] = hm['Naive '].astype(float)

In [None]:
hmr2 = tabla3.T.copy()
hmr2['LSTM 32'] = hmr2['LSTM 32'].astype(float)
hmr2['LSTM 64'] = hmr2['LSTM 64'].astype(float)
hmr2['LSTM 100'] = hmr2['LSTM 100'].astype(float)
hmr2['GRU 32'] = hmr2['GRU 32'].astype(float)
hmr2['GRU 64'] = hmr2['GRU 64'].astype(float)
hmr2['GRU 100'] = hmr2['GRU 100'].astype(float)
hmr2['Naive '] = hmr2['Naive '].astype(float)

In [None]:
plt.figure(figsize=(10, 4))
sns.heatmap(hm.drop(['Naive '], axis=1, inplace=False), annot=True, cmap=sns.color_palette("Reds",200), fmt=".2f", linewidths=.5)
plt.title("Heatmap de RMSE para cada modelo de red neuronal en cada estación de control")
plt.show()

In [None]:
plt.figure(figsize=(10, 4))
sns.heatmap(hmr2.drop(['Naive '], axis=1, inplace=False), annot=True, cmap=sns.color_palette("Blues_r",200), fmt=".2f", linewidths=.5)
plt.title("Heatmap de R2 para cada modelo de red neuronal en cada estación de control")
plt.show()

#### 4.6. Predicciones <a class="anchor" id="4.6."></a> 

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 8))

predicciones = p[(p['MODELO'] == 'LSTM 100') & (p['ESTACION'] == '57')]['PREDICCIONES'].values[0][:168*1]

valores_reales = df_e57['NivelesPM10'].iloc[len(idx_train_e57) + len(idx_val_e57):][:168*1]
plt.plot(valores_reales, color='black', label='Valores reales')

vals = df_e57['NivelesPM10'].iloc[len(idx_train_e57) + len(idx_val_e57):][:168*1]
for i in range(len(predicciones)):
    min_len = min(len(vals.index[i:i + len(predicciones[i])]), len(predicciones[i]))

    plt.plot(
        vals.index[i:i + min_len], 
        predicciones[i][:min_len], 
        label=f'Predicción desde {vals.index[i]} a {vals.index[i + min_len - 1]}' if i < 6 else ""
    )

plt.title('Predicciones para la estación 57 con LSTM 100')
plt.xlabel('Horas')
plt.ylabel('Valor de la Predicción')
plt.grid(True)

plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 8))

predicciones = p[(p['MODELO'] == 'LSTM 100') & (p['ESTACION'] == '55')]['PREDICCIONES'].values[0][:168*1]

valores_reales = df_e55['NivelesPM10'].iloc[len(idx_train_e55) + len(idx_val_e55):][:168*1]
plt.plot(valores_reales, color='black', label='Valores reales')

vals = df_e55['NivelesPM10'].iloc[len(idx_train_e55) + len(idx_val_e55):][:168*1]
for i in range(len(predicciones)):
    min_len = min(len(vals.index[i:i + len(predicciones[i])]), len(predicciones[i]))

    plt.plot(
        vals.index[i:i + min_len], 
        predicciones[i][:min_len], 
        label=f'Predicción desde {vals.index[i]} a {vals.index[i + min_len - 1]}' if i < 6 else ""
    )

plt.title('Predicciones para la estación 55 con LSTM 100')
plt.xlabel('Horas')
plt.ylabel('Valor de la Predicción')
plt.grid(True)

plt.legend()
plt.show()


#### 4.7. Veces que los modelos se superan unos a otros <a class="anchor" id="4.6."></a> 

In [None]:
modelos = p['MODELO'].unique()
tabla = pd.DataFrame(index=modelos, columns=modelos)

for modelo1 in modelos:
    for modelo2 in modelos:
        if modelo1 != modelo2:
            suma_dif = df_DMS_uni[(df_DMS_uni['MODELO 1'] == modelo1) & (df_DMS_uni['MODELO 2'] == modelo2)]['DIFERENCIA'].sum()
            tabla.loc[modelo1, modelo2] = suma_dif

for i in range(len(modelos)):
    tabla.iloc[i, i] = '-'

print()
print(tabla)
print()