In [1]:
import sys
import importlib
import os

import files
importlib.reload(files)

import fonctions
importlib.reload(fonctions)

from files import *
from fonctions import *

In [2]:
consommation_chauffage_toulouse = extract_and_concat_consommation(toulouse, column_index=4, prefix="consommation_heat_")
consommation_chauffage_zurich = extract_and_concat_consommation(zurich, column_index=4, prefix="consommation_heat_")
consommation_chauffage_seville = extract_and_concat_consommation(seville, column_index=4, prefix="consommation_heat_")
consommation_climatisation_toulouse = extract_and_concat_consommation(toulouse, column_index=5, prefix="consommation_cool_")
consommation_climatisation_zurich = extract_and_concat_consommation(zurich, column_index=5, prefix="consommation_cool_")
consommation_climatisation_seville = extract_and_concat_consommation(seville, column_index=5, prefix="consommation_cool_")


city_groups = {
    "toulouse": toulouse_meteo,
    "zurich": zurich_meteo,
    "seville": seville_meteo
}

prefix_column_map = {
    "Text_": 1,
    "Hum_": 3,
    "Wind_": 4,
    "Solar_": 5,
    "Ground_": 10
}

combined_data = extract_and_combine_all(city_groups, prefix_column_map)

Text_combined_toulouse = combined_data.get('Text_combined_toulouse')
Hum_combined_toulouse = combined_data.get('Hum_combined_toulouse')


In [3]:
import torch
import torch.nn as nn
import math

# Encodage positionnel pour donner une notion du temps au modèle
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# Le modèle Transformer principal
class TimeSeriesTransformer(nn.Module):
    def __init__(self, num_features=3, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24):
        super().__init__()
        self.input_projection = nn.Linear(num_features, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.output_layer = nn.Linear(d_model, output_size)
        #self.output_layer = nn.Linear(d_model, 1)


    def forward(self, x):
        # x shape: (batch_size, seq_len=24, num_features=3)
        x = self.input_projection(x)         # → (batch_size, 24, d_model)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)      # → (batch_size, 24, d_model)
        out = self.output_layer(x)           # → (batch_size, 24, 24)
        return out[:, :, 0]                  # → (batch_size, 24), on garde une seule sortie par heure


In [4]:
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


class TimeSeriesDataset(Dataset):
    def __init__(self, df, input_blocks, target_block):
        """
        df : DataFrame avec toutes les variables concaténées par blocs de 24h
        input_blocks : liste de tuples (col_start, col_end, day_offset)
                       où day_offset = 0 pour aujourd’hui, -1 pour hier, etc.
        target_block : tuple (col_start, col_end) pour la target (conso aujourd’hui)
        """
        self.x = []
        self.y = []

        for i in range(1, len(df)):  # Commencer à 1 pour avoir accès à hier
            input_seq = []

            for start, end, offset in input_blocks:
                row_index = i + offset
                values = df.iloc[row_index, start:end].values  # shape: (24,)
                input_seq.append(values)

            # shape finale: (24, num_features)
            input_seq = np.stack(input_seq, axis=1)
            self.x.append(input_seq)

            # Target: consommation aujourd’hui
            target = df.iloc[i, target_block[0]:target_block[1]].values
            self.y.append(target)

        self.x = torch.tensor(self.x, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


In [5]:


# --- Fonction pour calculer les métriques ---
def compute_metrics(predictions, targets):
    mse = torch.mean((predictions - targets) ** 2)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(predictions - targets))
    ss_total = torch.sum((targets - torch.mean(targets)) ** 2)
    ss_residual = torch.sum((targets - predictions) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    cvrmse = (rmse / torch.mean(targets)) * 100
    return mae, rmse, mse, r2, cvrmse

In [6]:
# --- Fonction pour séparer les données ---
def prepare_data(df_scaled,n_features):

    input_blocks = []

    for i in range(n_features):
        start = i * 24
        end = (i + 1) * 24

        # Aujourd’hui
        input_blocks.append((start, end, 0))
        # Hier
        input_blocks.append((start, end, -1))

    # Ajouter la consommation d’hier (toujours la dernière feature)
    conso_start = n_features * 24
    conso_end = conso_start + 24
    input_blocks.append((conso_start, conso_end, -1))
    target_block = (df_scaled.shape[1] - 24, df_scaled.shape[1])    
        
    df_trainval, df_test = train_test_split(df_scaled, test_size=0.2, shuffle=False)
    df_train, df_val = train_test_split(df_trainval, test_size=0.1, shuffle=False)
    
    train_dataset = TimeSeriesDataset(df_train.reset_index(drop=True),input_blocks, target_block)
    val_dataset = TimeSeriesDataset(df_val.reset_index(drop=True),input_blocks, target_block)
    test_dataset = TimeSeriesDataset(df_test.reset_index(drop=True),input_blocks, target_block)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [7]:

# --- Fonction pour entraîner le modèle ---
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = total_mae = total_rmse = 0
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            mae, rmse, _, _, _ = compute_metrics(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_mae += mae.item()
            total_rmse += rmse.item()

        # Validation
        model.eval()
        val_loss = val_mae = val_rmse = 0
        with torch.no_grad():
            for x_val, y_val in val_loader:
                output = model(x_val)
                loss = criterion(output, y_val)
                mae, rmse, _, _, _ = compute_metrics(output, y_val)
                val_loss += loss.item()
                val_mae += mae.item()
                val_rmse += rmse.item()

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss / len(val_loader):.4f}, Val MAE: {val_mae / len(val_loader):.4f}, Val RMSE: {val_rmse / len(val_loader):.4f}")


In [8]:
# --- Fonction pour évaluer sur le test set ---
def evaluate_test(model, test_loader, scaler_y):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for x_test, y_test in test_loader:
            y_pred = model(x_test)
            all_preds.append(y_pred.squeeze().cpu().numpy())
            all_targets.append(y_test.squeeze().cpu().numpy())

    all_preds = np.array(all_preds).reshape(-1, 24)
    all_targets = np.array(all_targets).reshape(-1, 24)

    # Inverse scaling
    all_preds_orig = scaler_y.inverse_transform(all_preds)
    all_targets_orig = scaler_y.inverse_transform(all_targets)

    # Convertir en tensors
    y_pred_tensor = torch.tensor(all_preds_orig)
    y_test_tensor = torch.tensor(all_targets_orig)

    # Calculer les métriques
    mae, rmse, mse, r2, cvrmse = compute_metrics(y_pred_tensor, y_test_tensor)
    print(f"Test Metrics -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, CVRMSE: {cvrmse:.4f}")


In [9]:
"""
prediction en fonction de Text et conso t-1 + text  t 
"""

df2 = Text_combined_toulouse.copy()
df2=pd.concat([df2,consommation_chauffage_toulouse],axis=1).reset_index(drop=True)
df2.columns = [f"col_{i}" for i in range(df2.shape[1])]

target_columns = df2.columns[-24:]  
input_columns = df2.columns[:-24]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = pd.DataFrame(scaler_X.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler_y.fit_transform(df2[target_columns]), columns=target_columns)
df_scaled = pd.concat([X_scaled, y_scaled], axis=1)


train_loader, val_loader, test_loader = prepare_data(df_scaled,1)


model = TimeSeriesTransformer(num_features=3, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10)
evaluate_test(model, test_loader, scaler_y) 

  self.x = torch.tensor(self.x, dtype=torch.float32)


Epoch 1/10, Train Loss: 0.0712, Val Loss: 0.0325, Val MAE: 0.1499, Val RMSE: 0.1793
Epoch 2/10, Train Loss: 0.0201, Val Loss: 0.0274, Val MAE: 0.1355, Val RMSE: 0.1640
Epoch 3/10, Train Loss: 0.0167, Val Loss: 0.0256, Val MAE: 0.1275, Val RMSE: 0.1577
Epoch 4/10, Train Loss: 0.0146, Val Loss: 0.0233, Val MAE: 0.1227, Val RMSE: 0.1509
Epoch 5/10, Train Loss: 0.0132, Val Loss: 0.0224, Val MAE: 0.1206, Val RMSE: 0.1486
Epoch 6/10, Train Loss: 0.0119, Val Loss: 0.0203, Val MAE: 0.1135, Val RMSE: 0.1409
Epoch 7/10, Train Loss: 0.0105, Val Loss: 0.0176, Val MAE: 0.1045, Val RMSE: 0.1313
Epoch 8/10, Train Loss: 0.0097, Val Loss: 0.0179, Val MAE: 0.1044, Val RMSE: 0.1323
Epoch 9/10, Train Loss: 0.0088, Val Loss: 0.0161, Val MAE: 0.1008, Val RMSE: 0.1256
Epoch 10/10, Train Loss: 0.0082, Val Loss: 0.0153, Val MAE: 0.0948, Val RMSE: 0.1219
Test Metrics -> MAE: 149.8173, RMSE: 255.2692, MSE: 65162.3828, R2: 0.8750, CVRMSE: 98.8095


In [10]:
"""
prediction en fonction de Text et hum  et conso t-1 + text et hum  t 
"""

Text_Hum_combined_toulouse=concat_and_create_final_df("toulouse",  ['Text_', 'Hum_'],combined_data)
df2 = Text_Hum_combined_toulouse.copy()
df2=pd.concat([df2,consommation_chauffage_toulouse],axis=1).reset_index(drop=True)
df2.columns = [f"col_{i}" for i in range(df2.shape[1])]

target_columns = df2.columns[-24:]  
input_columns = df2.columns[:-24]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = pd.DataFrame(scaler_X.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler_y.fit_transform(df2[target_columns]), columns=target_columns)
df_scaled = pd.concat([X_scaled, y_scaled], axis=1)


train_loader, val_loader, test_loader = prepare_data(df_scaled,2)


model = TimeSeriesTransformer(num_features=5, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10)
evaluate_test(model, test_loader, scaler_y) 

Epoch 1/10, Train Loss: 0.0900, Val Loss: 0.0368, Val MAE: 0.1636, Val RMSE: 0.1912
Epoch 2/10, Train Loss: 0.0257, Val Loss: 0.0287, Val MAE: 0.1421, Val RMSE: 0.1678
Epoch 3/10, Train Loss: 0.0180, Val Loss: 0.0252, Val MAE: 0.1303, Val RMSE: 0.1575
Epoch 4/10, Train Loss: 0.0142, Val Loss: 0.0225, Val MAE: 0.1196, Val RMSE: 0.1486
Epoch 5/10, Train Loss: 0.0129, Val Loss: 0.0195, Val MAE: 0.1098, Val RMSE: 0.1381
Epoch 6/10, Train Loss: 0.0108, Val Loss: 0.0165, Val MAE: 0.0994, Val RMSE: 0.1269
Epoch 7/10, Train Loss: 0.0088, Val Loss: 0.0147, Val MAE: 0.0927, Val RMSE: 0.1200
Epoch 8/10, Train Loss: 0.0078, Val Loss: 0.0135, Val MAE: 0.0872, Val RMSE: 0.1148
Epoch 9/10, Train Loss: 0.0074, Val Loss: 0.0132, Val MAE: 0.0870, Val RMSE: 0.1138
Epoch 10/10, Train Loss: 0.0068, Val Loss: 0.0133, Val MAE: 0.0835, Val RMSE: 0.1137
Test Metrics -> MAE: 138.6771, RMSE: 234.8675, MSE: 55162.7617, R2: 0.8942, CVRMSE: 90.9124


In [11]:
"""
prediction en fonction de( Text et hum et wind  et conso )t-1 + (text  et hum et wind ) t 
"""

Text_Hum_combined_toulouse=concat_and_create_final_df("toulouse",  ['Text_', 'Hum_','Wind_'],combined_data)


df2 = Text_Hum_combined_toulouse.copy()
df2=pd.concat([df2,consommation_chauffage_toulouse],axis=1).reset_index(drop=True)


df2.columns = [f"col_{i}" for i in range(df2.shape[1])]

target_columns = df2.columns[-24:]  
input_columns = df2.columns[:-24]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = pd.DataFrame(scaler_X.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler_y.fit_transform(df2[target_columns]), columns=target_columns)
df_scaled = pd.concat([X_scaled, y_scaled], axis=1)


train_loader, val_loader, test_loader = prepare_data(df_scaled,3)


model = TimeSeriesTransformer(num_features=7, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10)
evaluate_test(model, test_loader, scaler_y) 

Epoch 1/10, Train Loss: 0.0877, Val Loss: 0.0446, Val MAE: 0.1713, Val RMSE: 0.2067
Epoch 2/10, Train Loss: 0.0291, Val Loss: 0.0322, Val MAE: 0.1431, Val RMSE: 0.1762
Epoch 3/10, Train Loss: 0.0193, Val Loss: 0.0267, Val MAE: 0.1330, Val RMSE: 0.1614
Epoch 4/10, Train Loss: 0.0163, Val Loss: 0.0250, Val MAE: 0.1292, Val RMSE: 0.1563
Epoch 5/10, Train Loss: 0.0141, Val Loss: 0.0226, Val MAE: 0.1190, Val RMSE: 0.1483
Epoch 6/10, Train Loss: 0.0125, Val Loss: 0.0209, Val MAE: 0.1162, Val RMSE: 0.1430
Epoch 7/10, Train Loss: 0.0107, Val Loss: 0.0179, Val MAE: 0.1054, Val RMSE: 0.1320
Epoch 8/10, Train Loss: 0.0101, Val Loss: 0.0166, Val MAE: 0.1010, Val RMSE: 0.1271
Epoch 9/10, Train Loss: 0.0097, Val Loss: 0.0161, Val MAE: 0.0987, Val RMSE: 0.1252
Epoch 10/10, Train Loss: 0.0092, Val Loss: 0.0144, Val MAE: 0.0916, Val RMSE: 0.1180
Test Metrics -> MAE: 132.0270, RMSE: 256.6242, MSE: 65855.9688, R2: 0.8737, CVRMSE: 99.3339


In [12]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# Étape 1 : clustering
clustering_heat_toulouse = clustering(
    df=consommation_chauffage_toulouse,  
    n_parts=1,                          
    status_column="heat_on",           
    n_clusters_list=[3]                
)

# Étape 2 : concaténer les données
df2 = Text_combined_toulouse.copy()
df2 = pd.concat([df2, clustering_heat_toulouse], axis=1).reset_index(drop=True)
df2.columns = [f"col_{i}" for i in range(df2.shape[1])]


# Étape 3 : Séparer colonnes input/target
target_columns = df2.columns[-26:-2]  
input_columns = df2.columns[:24]

scaler = StandardScaler()

X_scaled = pd.DataFrame(scaler.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler.fit_transform(df2[target_columns]), columns=target_columns)

status = pd.DataFrame(np.tile(df2.iloc[:, -2].values, (24, 1)).T, columns=[f"status_{i}" for i in range(24)])
cluster = pd.DataFrame(np.tile(df2.iloc[:, -1].values, (24, 1)).T, columns=[f"cluster_{i}" for i in range(24)])
df_scaled = pd.concat([X_scaled, status, cluster, y_scaled], axis=1).reset_index(drop=True)




In [13]:
"""
prediction en fonction de (Text et conso et cluster et status) t-1 + (Text  et cluster et status)  t 
"""


train_loader, val_loader, test_loader = prepare_data(df_scaled,3)

model = TimeSeriesTransformer(num_features=7, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24) 
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10)
evaluate_test(model, test_loader, scaler) 

Epoch 1/10, Train Loss: 0.3512, Val Loss: 0.6471, Val MAE: 0.6013, Val RMSE: 0.7925
Epoch 2/10, Train Loss: 0.2259, Val Loss: 0.4612, Val MAE: 0.5200, Val RMSE: 0.6693
Epoch 3/10, Train Loss: 0.1628, Val Loss: 0.4311, Val MAE: 0.5116, Val RMSE: 0.6510
Epoch 4/10, Train Loss: 0.1505, Val Loss: 0.4250, Val MAE: 0.4650, Val RMSE: 0.6416
Epoch 5/10, Train Loss: 0.1294, Val Loss: 0.3395, Val MAE: 0.4100, Val RMSE: 0.5734
Epoch 6/10, Train Loss: 0.1133, Val Loss: 0.3179, Val MAE: 0.3779, Val RMSE: 0.5548
Epoch 7/10, Train Loss: 0.1009, Val Loss: 0.3301, Val MAE: 0.3840, Val RMSE: 0.5608
Epoch 8/10, Train Loss: 0.0932, Val Loss: 0.2459, Val MAE: 0.3259, Val RMSE: 0.4854
Epoch 9/10, Train Loss: 0.0780, Val Loss: 0.2652, Val MAE: 0.3667, Val RMSE: 0.5045
Epoch 10/10, Train Loss: 0.0716, Val Loss: 0.2890, Val MAE: 0.3619, Val RMSE: 0.5204
Test Metrics -> MAE: 95.6262, RMSE: 190.6623, MSE: 36352.1289, R2: 0.9303, CVRMSE: 73.8015
