In [22]:
import sys
import os
import importlib

# Ajout du chemin une fois
sys.path.append(os.path.abspath('../python_files'))

# Liste des noms des modules à importer et recharger
module_names = [
    'preprocessing',
    'imports',
    'files',
    'clustering',
    'profile_prediction',
    'prediction_24h'
]

# Import et reload automatique
modules = {}
for name in module_names:
    modules[name] = importlib.import_module(name)
    importlib.reload(modules[name])

# Importer tout en global namespace (équivalent à from ... import *)
for name in module_names:
    exec(f'from {name} import *')


In [23]:
consommation_chauffage = extract_and_concat_consommation(Simulation_Dataset, column_index=4, prefix="consommation_heat_")


In [24]:

# Exemple d'utilisation
city_groups = {
    "Weather_Dataset": Weather_Dataset
   
    
}

prefix_column_map = {
    "Text_": 1,
    "Hum_": 3,
    "Wind_": 4,
    "Solar_": 5,
    "Ground_": 10
}

combined_data = extract_and_combine_all(city_groups, prefix_column_map)

city_groups = {
    "Simulation_Dataset":Simulation_Dataset  
}

prefix_column_map = {
    "Tint_": 2,   
}

combined_Tint = extract_and_combine_all(city_groups, prefix_column_map)


In [25]:
Text_combined = combined_data.get('Text_combined_Weather_Dataset')
Hum_combined = combined_data.get('Hum_combined_Weather_Dataset')
Wind_combined = combined_data.get('Wind_combined_Weather_Dataset')
Tint_combined = combined_Tint.get('Tint_combined_Simulation_Dataset')


In [6]:
import torch
import torch.nn as nn
import math

# Encodage positionnel pour donner une notion du temps au modèle
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

# Le modèle Transformer principal
class TimeSeriesTransformer(nn.Module):
    def __init__(self, num_features=3, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1, output_size=24):
        super().__init__()
        self.input_projection = nn.Linear(num_features, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        self.output_layer = nn.Linear(d_model, output_size)
        #self.output_layer = nn.Linear(d_model, 1)


    def forward(self, x):
        # x shape: (batch_size, seq_len=24, num_features=3)
        x = self.input_projection(x)         # → (batch_size, 24, d_model)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)      # → (batch_size, 24, d_model)
        out = self.output_layer(x)           # → (batch_size, 24, 24)
        return out[:, :, 0]                  # → (batch_size, 24), on garde une seule sortie par heure


In [7]:
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split


class TimeSeriesDataset(Dataset):
    def __init__(self, df, input_blocks, target_block):
        """
        df : DataFrame avec toutes les variables concaténées par blocs de 24h
        input_blocks : liste de tuples (col_start, col_end, day_offset)
                       où day_offset = 0 pour aujourd’hui, -1 pour hier, etc.
        target_block : tuple (col_start, col_end) pour la target (conso aujourd’hui)
        """
        self.x = []
        self.y = []

        for i in range(1, len(df)):  # Commencer à 1 pour avoir accès à hier
            input_seq = []

            for start, end, offset in input_blocks:
                row_index = i + offset
                values = df.iloc[row_index, start:end].values  # shape: (24,)
                input_seq.append(values)

            # shape finale: (24, num_features)
            input_seq = np.stack(input_seq, axis=1)
            self.x.append(input_seq)

            # Target: consommation aujourd’hui
            target = df.iloc[i, target_block[0]:target_block[1]].values
            self.y.append(target)

        self.x = torch.tensor(self.x, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.float32)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


In [8]:


# --- Fonction pour calculer les métriques ---
def compute_metrics(predictions, targets):
    mse = torch.mean((predictions - targets) ** 2)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(predictions - targets))
    ss_total = torch.sum((targets - torch.mean(targets)) ** 2)
    ss_residual = torch.sum((targets - predictions) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    cvrmse = (rmse / torch.mean(targets)) * 100
    return mae, rmse, mse, r2, cvrmse

In [9]:
# --- Fonction pour séparer les données ---
def prepare_data(df_scaled,n_features):

    input_blocks = []

    for i in range(n_features):
        start = i * 24
        end = (i + 1) * 24

        # Aujourd’hui
        input_blocks.append((start, end, 0))
        # Hier
        input_blocks.append((start, end, -1))

    # Ajouter la consommation d’hier (toujours la dernière feature)
    conso_start = n_features * 24
    conso_end = conso_start + 24
    input_blocks.append((conso_start, conso_end, -1))
    target_block = (df_scaled.shape[1] - 24, df_scaled.shape[1])    
        
    df_trainval, df_test = train_test_split(df_scaled, test_size=0.2, shuffle=False)
    df_train, df_val = train_test_split(df_trainval, test_size=0.1, shuffle=False)
    
    train_dataset = TimeSeriesDataset(df_train.reset_index(drop=True),input_blocks, target_block)
    val_dataset = TimeSeriesDataset(df_val.reset_index(drop=True),input_blocks, target_block)
    test_dataset = TimeSeriesDataset(df_test.reset_index(drop=True),input_blocks, target_block)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [10]:

# --- Fonction pour entraîner le modèle ---
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=10):
    for epoch in range(epochs):
        model.train()
        total_loss = total_mae = total_rmse = 0
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = criterion(output, y_batch)
            mae, rmse, _, _, _ = compute_metrics(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            total_mae += mae.item()
            total_rmse += rmse.item()

        # Validation
        model.eval()
        val_loss = val_mae = val_rmse = 0
        with torch.no_grad():
            for x_val, y_val in val_loader:
                output = model(x_val)
                loss = criterion(output, y_val)
                mae, rmse, _, _, _ = compute_metrics(output, y_val)
                val_loss += loss.item()
                val_mae += mae.item()
                val_rmse += rmse.item()

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss / len(val_loader):.4f}, Val MAE: {val_mae / len(val_loader):.4f}, Val RMSE: {val_rmse / len(val_loader):.4f}")


In [73]:
def evaluate_test(model, test_loader, scaler_y):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for x_test, y_test in test_loader:
            y_pred = model(x_test)
            # Pas de squeeze ici
            all_preds.append(y_pred.cpu().numpy())
            all_targets.append(y_test.cpu().numpy())

    all_preds = np.vstack(all_preds)  # concat verticalement tous les batches
    all_targets = np.vstack(all_targets)

    # Inverse scaling
    all_preds_orig = scaler_y.inverse_transform(all_preds)
    all_targets_orig = scaler_y.inverse_transform(all_targets)

    # Convertir en tensors pour calcul métriques
    y_pred_tensor = torch.tensor(all_preds_orig)
    y_test_tensor = torch.tensor(all_targets_orig)

    mae, rmse, mse, r2, cvrmse = compute_metrics(y_pred_tensor, y_test_tensor)
    print(f"Test Metrics -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, MSE: {mse:.4f}, R2: {r2:.4f}, CVRMSE: {cvrmse:.4f}")


In [61]:
start_date = "2018-01-01"
dates = pd.date_range(start=start_date, periods=len(consommation_chauffage), freq="D") 
Text_combined.insert(0, "Date", dates)
Hum_combined.insert(0, "Date", dates)

ValueError: cannot insert Date, already exists

In [62]:
df2=pd.concat([Text_combined,consommation_chauffage],axis=1).reset_index(drop=True)


In [63]:
import pandas as pd

df2 = df2.sort_values('Date').reset_index(drop=True)

combined_rows = []

for idx, row in df2.iterrows():
    jour = row['Date']
    jour_precedent = jour - pd.Timedelta(days=1)

    prev_row = df2[df2['Date'] == jour_precedent]

    if not prev_row.empty:
        prev_row = prev_row.iloc[0]
        prev_row_renamed = prev_row.add_suffix('_j-1')
        prev_row_renamed = prev_row_renamed.drop(['Date', 'Date_j-1'], errors='ignore')

        # Inverser l'ordre ici
        combined_row = pd.concat([prev_row_renamed.to_frame().T.reset_index(drop=True), 
                                  row.to_frame().T.reset_index(drop=True)], axis=1)

        combined_rows.append(combined_row)
    else:
        continue

df_final = pd.concat(combined_rows, ignore_index=True)



In [74]:


df2 = df_final.copy()
df2 = df2.drop(columns=['Date'], errors='ignore')

df2.columns = [f"col_{i}" for i in range(df2.shape[1])]

target_columns = df2.columns[-24:]  
input_columns = df2.columns[:-24]

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = pd.DataFrame(scaler_X.fit_transform(df2[input_columns]), columns=input_columns)
y_scaled = pd.DataFrame(scaler_y.fit_transform(df2[target_columns]), columns=target_columns)
df_scaled = pd.concat([X_scaled, y_scaled], axis=1)

input_blocks = [
    (0, 24, 0),    # bloc 1 : colonnes 0–23 → température J-1
    (24, 48, 0),   # bloc 2 : colonnes 24–47 → température J
    (48, 72, 0),   # bloc 3 : colonnes 48–71 → consommation J-1
]
target_block = (72, 96)  # consommation J (cible)


train_df, temp_df = train_test_split(df_scaled, test_size=0.3, shuffle=False)
val_df, test_df = train_test_split(temp_df, test_size=0.5, shuffle=False)

# 3. Créer les datasets
train_dataset = TimeSeriesDataset(train_df.reset_index(drop=True), input_blocks, target_block)
val_dataset = TimeSeriesDataset(val_df.reset_index(drop=True), input_blocks, target_block)
test_dataset = TimeSeriesDataset(test_df.reset_index(drop=True), input_blocks, target_block)

# 4. Créer les DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

# 5. Initialiser le modèle
model = TimeSeriesTransformer(num_features=3)

# 6. Définir loss et optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# 7. Entraîner le modèle
train_model(model, train_loader, val_loader, optimizer, criterion, epochs=20)

# 8. Évaluer sur test set
evaluate_test(model, test_loader, scaler_y)

Epoch 1/20, Train Loss: 0.0602, Val Loss: 0.0121, Val MAE: 0.0735, Val RMSE: 0.0949
Epoch 2/20, Train Loss: 0.0151, Val Loss: 0.0096, Val MAE: 0.0528, Val RMSE: 0.0747
Epoch 3/20, Train Loss: 0.0127, Val Loss: 0.0106, Val MAE: 0.0646, Val RMSE: 0.0883
Epoch 4/20, Train Loss: 0.0112, Val Loss: 0.0085, Val MAE: 0.0495, Val RMSE: 0.0715
Epoch 5/20, Train Loss: 0.0102, Val Loss: 0.0088, Val MAE: 0.0506, Val RMSE: 0.0717
Epoch 6/20, Train Loss: 0.0096, Val Loss: 0.0079, Val MAE: 0.0430, Val RMSE: 0.0668
Epoch 7/20, Train Loss: 0.0088, Val Loss: 0.0097, Val MAE: 0.0598, Val RMSE: 0.0814
Epoch 8/20, Train Loss: 0.0096, Val Loss: 0.0076, Val MAE: 0.0487, Val RMSE: 0.0709
Epoch 9/20, Train Loss: 0.0079, Val Loss: 0.0069, Val MAE: 0.0435, Val RMSE: 0.0638
Epoch 10/20, Train Loss: 0.0073, Val Loss: 0.0067, Val MAE: 0.0459, Val RMSE: 0.0650
Epoch 11/20, Train Loss: 0.0075, Val Loss: 0.0064, Val MAE: 0.0409, Val RMSE: 0.0607
Epoch 12/20, Train Loss: 0.0068, Val Loss: 0.0062, Val MAE: 0.0393, Val RM