In [1]:
import sys
import importlib
import os

import files
importlib.reload(files)

import fonctions
importlib.reload(fonctions)

from files import *
from fonctions import *

In [2]:
consommation_chauffage_toulouse = extract_and_concat_consommation(toulouse, column_index=4, prefix="consommation_heat_")
consommation_chauffage_zurich = extract_and_concat_consommation(zurich, column_index=4, prefix="consommation_heat_")
consommation_chauffage_seville = extract_and_concat_consommation(seville, column_index=4, prefix="consommation_heat_")
consommation_climatisation_toulouse = extract_and_concat_consommation(toulouse, column_index=5, prefix="consommation_cool_")
consommation_climatisation_zurich = extract_and_concat_consommation(zurich, column_index=5, prefix="consommation_cool_")
consommation_climatisation_seville = extract_and_concat_consommation(seville, column_index=5, prefix="consommation_cool_")


city_groups = {
    "toulouse": toulouse_meteo,
    "zurich": zurich_meteo,
    "seville": seville_meteo
}

prefix_column_map = {
    "Text_": 1,
    "Hum_": 3,
    "Wind_": 4,
    "Solar_": 5,
    "Ground_": 10
}

combined_data = extract_and_combine_all(city_groups, prefix_column_map)

Text_combined_toulouse = combined_data.get('Text_combined_toulouse')
Hum_combined_toulouse = combined_data.get('Hum_combined_toulouse')


In [3]:


# --- Fonction pour calculer les métriques ---
def compute_metrics(predictions, targets):
    mse = torch.mean((predictions - targets) ** 2)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(predictions - targets))
    ss_total = torch.sum((targets - torch.mean(targets)) ** 2)
    ss_residual = torch.sum((targets - predictions) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    cvrmse = (rmse / torch.mean(targets)) * 100
    return mae, rmse, mse, r2, cvrmse

In [6]:
"""
48 heures (24 t-1 + 24 t) avec 4 features)
"""

import torch
from torch.utils.data import Dataset
import numpy as np

class TimeSeriesDataset(Dataset):
    def __init__(self, df):
        """
        Dataset construit à partir d'un DataFrame df structuré en blocs de 24 colonnes :
        - Température : colonnes 0 à 23
        - Chauffage : colonnes 24 à 47
        - Clusters : colonnes 48 à 71
        - Consommation : colonnes 72 à 95
        """
        self.x = []
        self.y = []

        for i in range(1, len(df)):  # On commence à partir de i=1 pour avoir un i-1
            # Données pour l'instant t-1
            prev_temp = df.iloc[i-1, :24].values
            prev_heat = df.iloc[i-1, 24:48].values
            prev_cluster = df.iloc[i-1, 48:72].values
            prev_conso = df.iloc[i-1, 72:].values

            # Rassembler les données de température, chauffage, cluster et consommation passées dans un vecteur
            prev_data = np.column_stack([prev_temp, prev_heat, prev_cluster, prev_conso])

            # Données pour l'instant t (pour chaque heure t, mais on met la consommation à zéro)
            curr_temp = df.iloc[i, :24].values
            curr_heat = df.iloc[i, 24:48].values
            curr_cluster = df.iloc[i, 48:72].values
            curr_conso_zero = np.zeros(24)

            # Rassembler les données actuelles de température, chauffage et cluster, et consommation à zéro
            curr_data = np.column_stack([curr_temp, curr_heat, curr_cluster, curr_conso_zero])

            # Ajouter les deux blocs dans x
            self.x.append(np.vstack([prev_data, curr_data]))  # Shape : (48, 4)

            # La cible (consommation réelle pour l'instant t)
            target = df.iloc[i, 72:].values  # Consommation réelle
            self.y.append(target)  # Shape : (24,)

        self.x = torch.tensor(self.x, dtype=torch.float32)  # Shape: (N, 48, 4)
        self.y = torch.tensor(self.y, dtype=torch.float32)  # Shape: (N, 24)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


def prepare_data(df_scaled):
    """
    Prépare les DataLoaders pour entraînement, validation et test
    en utilisant la nouvelle version de TimeSeriesDataset.
    """
    # Découpage en train, validation et test
    df_trainval, df_test = train_test_split(df_scaled, test_size=0.2, shuffle=False)
    df_train, df_val = train_test_split(df_trainval, test_size=0.1, shuffle=False)

    # Création des datasets (la nouvelle version de TimeSeriesDataset ne prend que df)
    train_dataset = TimeSeriesDataset(df_train.reset_index(drop=True))
    val_dataset = TimeSeriesDataset(df_val.reset_index(drop=True))
    test_dataset = TimeSeriesDataset(df_test.reset_index(drop=True))

    # Création des DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

    return train_loader, val_loader, test_loader



In [7]:
import torch
import torch.nn as nn
import math


# Encodage positionnel pour donner une notion du temps au modèle
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# Le modèle Transformer principal
class TimeSeriesTransformer(nn.Module):
    def __init__(self, num_features=3, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24):
        super().__init__()
        self.input_projection = nn.Linear(num_features, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.output_layer = nn.Linear(d_model, output_size)
        #self.output_layer = nn.Linear(d_model, 1)


    def forward(self, x):
        # x shape: (batch_size, seq_len=24, num_features=3)
        x = self.input_projection(x)         # → (batch_size, 24, d_model)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)      # → (batch_size, 24, d_model)
        out = self.output_layer(x)           # → (batch_size, 24, 24)
        return out[:, -24:, 0]                  # → (batch_size, 24), on garde une seule sortie par heure


In [8]:

# --- Fonction pour entraîner le modèle ---
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = total_mae = total_rmse = 0
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            mae, rmse, _, _, _ = compute_metrics(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_mae += mae.item()
            total_rmse += rmse.item()

        # Validation
        model.eval()
        val_loss = val_mae = val_rmse = 0
        with torch.no_grad():
            for x_val, y_val in val_loader:
                output = model(x_val)
                loss = criterion(output, y_val)
                mae, rmse, _, _, _ = compute_metrics(output, y_val)
                val_loss += loss.item()
                val_mae += mae.item()
                val_rmse += rmse.item()

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss / len(val_loader):.4f}, Val MAE: {val_mae / len(val_loader):.4f}, Val RMSE: {val_rmse / len(val_loader):.4f}")


In [9]:
def evaluate_test(model, test_loader, scaler_y):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for x_test, y_test in test_loader:
            y_pred = model(x_test)
            all_preds.append(y_pred.squeeze().cpu().numpy())
            all_targets.append(y_test.squeeze().cpu().numpy())

    all_preds = np.array(all_preds).reshape(-1, 24)
    all_targets = np.array(all_targets).reshape(-1, 24)

    # Inverse scaling
    all_preds_orig = scaler_y.inverse_transform(all_preds)
    all_targets_orig = scaler_y.inverse_transform(all_targets)

    # Convertir en tensors
    y_pred_tensor = torch.tensor(all_preds_orig)
    y_test_tensor = torch.tensor(all_targets_orig)

    # Calculer les métriques
    mae, rmse, mse, r2, cvrmse = compute_metrics(y_pred_tensor, y_test_tensor)
    print(f"Test Metrics -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, CVRMSE: {cvrmse:.4f}")


In [11]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Étape 1 : clustering
clustering_heat_toulouse = clustering(
    df=consommation_chauffage_toulouse,  
    n_parts=1,                          
    status_column="heat_on",           
    n_clusters_list=[3]                
)

# Étape 2 : concaténer les données
df2 = Text_combined_toulouse.copy()
df2 = pd.concat([df2, clustering_heat_toulouse], axis=1).reset_index(drop=True)
df2.columns = [f"col_{i}" for i in range(df2.shape[1])]


# Étape 3 : Séparer colonnes input/target
target_columns = df2.columns[-26:-2]  
input_columns = df2.columns[:24]

scaler = StandardScaler()

X_scaled = pd.DataFrame(scaler.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler.fit_transform(df2[target_columns]), columns=target_columns)

status = pd.DataFrame(np.tile(df2.iloc[:, -2].values, (24, 1)).T, columns=[f"status_{i}" for i in range(24)])
cluster = pd.DataFrame(np.tile(df2.iloc[:, -1].values, (24, 1)).T, columns=[f"cluster_{i}" for i in range(24)])
df_scaled = pd.concat([X_scaled, status, cluster, y_scaled], axis=1).reset_index(drop=True)




In [15]:
"""
prediction en fonction de (Text et conso et cluster et status) t-1 + (Text  et cluster et status)  t 
sur 48 heures 4 features par heures
"""


train_loader, val_loader, test_loader = prepare_data(df_scaled)
model = TimeSeriesTransformer(num_features=4, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10)
evaluate_test(model, test_loader, scaler) 

Epoch 1/10, Train Loss: 0.4885, Val Loss: 0.6399, Val MAE: 0.6476, Val RMSE: 0.7887
Epoch 2/10, Train Loss: 0.2142, Val Loss: 0.4614, Val MAE: 0.5276, Val RMSE: 0.6703
Epoch 3/10, Train Loss: 0.1549, Val Loss: 0.4230, Val MAE: 0.4691, Val RMSE: 0.6426
Epoch 4/10, Train Loss: 0.1413, Val Loss: 0.3805, Val MAE: 0.4526, Val RMSE: 0.6087
Epoch 5/10, Train Loss: 0.1302, Val Loss: 0.3798, Val MAE: 0.4550, Val RMSE: 0.6072
Epoch 6/10, Train Loss: 0.1217, Val Loss: 0.3302, Val MAE: 0.4096, Val RMSE: 0.5652
Epoch 7/10, Train Loss: 0.1122, Val Loss: 0.2829, Val MAE: 0.3790, Val RMSE: 0.5219
Epoch 8/10, Train Loss: 0.0890, Val Loss: 0.3013, Val MAE: 0.3755, Val RMSE: 0.5356
Epoch 9/10, Train Loss: 0.0914, Val Loss: 0.2412, Val MAE: 0.3207, Val RMSE: 0.4742
Epoch 10/10, Train Loss: 0.0823, Val Loss: 0.2403, Val MAE: 0.3249, Val RMSE: 0.4740
Test Metrics -> MAE: 110.1486, RMSE: 182.6036, MSE: 33344.0859, R2: 0.9361, CVRMSE: 70.6821
