In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError
import joblib


In [2]:
# Cargar los datos de generación y meteorológicos para la planta 2
plant_2_generation = pd.read_csv('Plant_2_Generation_Data.csv')
plant_2_weather = pd.read_csv('Plant_2_Weather_Sensor_Data.csv')

# Copiar los datos de generación para mantener df_GD2 limpio
df_GD2 = plant_2_generation.copy()

# Aplicar el mapeo a los nombres de los paneles solares
unique_source_keys_list_2 = df_GD2['SOURCE_KEY'].unique()
source_key_mapping_2 = {key: f"Solar_Panel_{i+1}" for i, key in enumerate(unique_source_keys_list_2)}
df_GD2['SOURCE_KEY'] = df_GD2['SOURCE_KEY'].map(source_key_mapping_2)

# Convertir la columna 'DATE_TIME' a formato de fecha y hora
# Ajustar el formato de fecha según sea necesario para que coincida con el archivo de la planta 2
df_GD2['DATE_TIME'] = pd.to_datetime(df_GD2['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')
plant_2_weather['DATE_TIME'] = pd.to_datetime(plant_2_weather['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')

# Unir los datos meteorológicos al dataframe de generación en función de la fecha y hora
df_GD2_with_weather = pd.merge(df_GD2, plant_2_weather, on='DATE_TIME', how='left')

# Eliminar las columnas PLANT_ID_y y SOURCE_KEY_y y renombrar las columnas PLANT_ID_x y SOURCE_KEY_x
df_GD2_with_weather_clean = df_GD2_with_weather.drop(columns=['PLANT_ID_y', 'SOURCE_KEY_y', 'PLANT_ID_x'])

# Renombrar las columnas para eliminar el sufijo '_x'
df_GD2_with_weather_clean = df_GD2_with_weather_clean.rename(columns={'SOURCE_KEY_x': 'SOURCE_KEY'})

# Mostrar cuántos valores nulos hay por columna
nulos2 = df_GD2_with_weather_clean.isnull().sum()
print("Valores nulos en cada columna (planta 2):\n", nulos2)

# Rellenar los valores nulos en solo las columnas numéricas con la media
numerical_cols_2 = df_GD2_with_weather_clean.select_dtypes(include=['float64', 'int64']).columns
df_GD2_with_weather_clean[numerical_cols_2] = df_GD2_with_weather_clean[numerical_cols_2].fillna(df_GD2_with_weather_clean[numerical_cols_2].mean())

# Guardar el dataframe limpio de la planta 2
df_GD2_limpio = df_GD2_with_weather_clean

Valores nulos en cada columna (planta 2):
 DATE_TIME              0
SOURCE_KEY             0
DC_POWER               0
AC_POWER               0
DAILY_YIELD            0
TOTAL_YIELD            0
AMBIENT_TEMPERATURE    0
MODULE_TEMPERATURE     0
IRRADIATION            0
dtype: int64


In [3]:
features = ['AC_POWER', 'DAILY_YIELD', 'TOTAL_YIELD','AMBIENT_TEMPERATURE', 
            'MODULE_TEMPERATURE', 'IRRADIATION']
# Definir X (variables predictoras) y Y (variable objetivo)
X_ker = df_GD2_limpio[features].values
Y_ker = df_GD2_limpio['DC_POWER'].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scores = []
mae_scores = []
r2_scores = []

for train_index, test_index in kf.split(X_ker):
    X_train_ker, X_test_ker = X_ker[train_index], X_ker[test_index]
    Y_train_ker, Y_test_ker = Y_ker[train_index], Y_ker[test_index]
    
    scaler_ker = MinMaxScaler()
    X_train_ker = scaler_ker.fit_transform(X_train_ker)
    X_test_ker = scaler_ker.transform(X_test_ker)
    joblib.dump(scaler_ker, 'scaler_ker2.pkl')
    
    
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=(X_train_ker.shape[1],), kernel_regularizer=l2(0.05)))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.05)))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.05)))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss=MeanSquaredError(), metrics=[MeanAbsoluteError()])
    
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(X_train_ker, Y_train_ker, epochs=100, batch_size=32, verbose=0,validation_data=(X_test_ker, Y_test_ker), callbacks=[early_stop])
    
    mse, mae = model.evaluate(X_test_ker, Y_test_ker, verbose=0)
    mse_scores.append(mse)
    mae_scores.append(mae)
    
    Y_test_pred = model.predict(X_test_ker, verbose=0)
    r2 = r2_score(Y_test_ker, Y_test_pred)
    r2_scores.append(r2)
  

print("MSE en cada fold:", mse_scores)
print("MSE promedio en validación cruzada:", np.mean(mse_scores))
print("MAE en cada fold:", mae_scores)
print("MAE promedio en validación cruzada:", np.mean(mae_scores))
print("R² en cada fold:", r2_scores)
print("R² promedio en validación cruzada:", np.mean(r2_scores))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


MSE en cada fold: [53.71696853637695, 49.55852508544922, 53.03296661376953, 61.07216262817383, 56.88442611694336]
MSE promedio en validación cruzada: 54.85300979614258
MAE en cada fold: [2.221435070037842, 2.3739919662475586, 2.096947193145752, 2.8867104053497314, 2.463996171951294]
MAE promedio en validación cruzada: 2.4086161613464356
R² en cada fold: [0.9998718744173271, 0.9998696875905188, 0.9998739189517933, 0.9998284905135796, 0.9998678678876641]
R² promedio en validación cruzada: 0.9998623678721765


MSE en cada fold: [62.32850646972656, 73.07862091064453, 103.7606201171875, 90.55905151367188, 56.519134521484375]

MSE promedio en validación cruzada: 77.24918670654297

MAE en cada fold: [2.724015951156616, 3.6545634269714355, 4.477487564086914, 4.092912197113037, 2.367548942565918]

MAE promedio en validación cruzada: 3.463305616378784

R² en cada fold: [0.9998338355572783, 0.9997193536561324, 0.9994696329800683, 0.9995921764835287, 0.999880124775671]

R² promedio en validación cruzada: 0.9996990246905357

In [4]:
model.save('Modelo_SP2.h5')

