# ☀️ Solar Energy Production – France (RTE Dataset)

This notebook analyzes real hourly data on solar energy production in France.  
It uses meteorological and temporal variables derived from the timestamp to explore production patterns and build a predictive model.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

# Ajustes de visualización
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)


In [None]:
# Cargar dataset preprocesado
df = pd.read_csv("france_solar_hourly_clean.csv", parse_dates=["datetime"])
df.head()


In [None]:
# Estadísticas básicas
df.describe()

# Comprobar valores nulos
print(df.isnull().sum())


In [None]:
# Producción solar por hora del día (promedio)
df.groupby("hour")["prod_solaire_MWh"].mean().plot(kind="bar", title="Average Solar Production by Hour")
plt.ylabel("MWh")
plt.xlabel("Hour of Day")
plt.show()


In [None]:
# Variables predictoras
X = df[["hour", "day", "month"]]
y = df["prod_solaire_MWh"]

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Modelo: Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predicción
y_pred = model.predict(X_test)


In [None]:
# Métricas de evaluación
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"MAE: {mae:.2f} MWh")
print(f"RMSE: {rmse:.2f} MWh")
