In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import plotly.express as px
import plotly.graph_objects as go

# ===========================
# 1. Chargement des données
# ===========================
def load_data(csv_path):
    #df = pd.read_csv(csv_path, sep=";", decimal=",")
    df = pd.read_csv(csv_path, sep=";",decimal=",", encoding="utf-8")
    print(df.head())
    features = [col for col in df.columns if "EUR_Nom_Spot" in col]
    
    # Extraction des scénarios uniques
    scenarios = df["Scenario"].unique()
    print("Scénarios uniques :", scenarios)
    
    # Initialisation d'un tableau pour stocker les données
    data = []

    for scenario in scenarios:
        # On récupère toutes les lignes pour ce scénario
        scenario_data = df[df["Scenario"] == scenario]
        scenario_data[features] = scenario_data[features].apply(pd.to_numeric, errors="coerce")  # Remplace les erreurs par NaN
        scenario_data.fillna(0, inplace=True)
        #print(f"Scénario {scenario} - Données après conversion :")
        #print(scenario_data[features].head())
        # Pour chaque time step (de 0 à 40), on extrait les caractéristiques
        scenario_values =scenario_data[features].astype(float).values
        #print(scenario_values)
        # Ajouter les données du scénario à la liste
        data.append(scenario_values)
    #print(np.array(data))
    # Convertir la liste de données en un tensor
    data_tensor = torch.tensor(np.array(data), dtype=torch.float32)
    
    return data_tensor, features


# ===========================
# 2. Définition du TBVAE
# ===========================
class TBVAE(nn.Module):
    def __init__(self, input_dim, latent_dim=10):
        super(TBVAE, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, latent_dim * 2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 1640)  # Changer 1640 en 1600
        )
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def forward(self, x):
        encoded = self.encoder(x)
        mu, logvar = encoded[:, :self.latent_dim], encoded[:, self.latent_dim:]
        z = self.reparameterize(mu, logvar)
        decoded = self.decoder(z)
        return decoded, mu, logvar

# ===========================
# 3. Entraînement du TBVAE
# ===========================
def train_tbvae(model, data, epochs=1000, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    history =[]
    for epoch in range(epochs):
        optimizer.zero_grad()
        reconstructed, mu, logvar = model(data)
        recon_loss = loss_fn(reconstructed, data)
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        loss = recon_loss + 0.001 * kl_loss
        loss.backward()
        optimizer.step()
        history.append(loss.item())
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item()}, ")
    return history

# ===========================
# 4. Génération de nouveaux scénarios
# ===========================
def generate_scenarios(model, num_scenarios=2):
    with torch.no_grad():
        z = torch.randn(num_scenarios, model.latent_dim)
        generated = model.decoder(z).numpy()

    # Reshape la sortie en (num_scenarios, 41, nombre_de_caracteristiques)
    generated = generated.reshape(num_scenarios, 41, len(features))

    return generated
def generate_scenarios2(model, num_scenarios=2):
    with torch.no_grad():
        z = torch.randn(num_scenarios, model.latent_dim)
        z = model.decoder_fc(z)  # Fully Connected avant LSTM
        z, _ = model.decoder_lstm(z)  # LSTM
        generated = model.decoder_out(z).numpy()  # Couche finale

    # Reshape la sortie en (num_scenarios, 41, nombre_de_caracteristiques)
        generated = generated.reshape(num_scenarios, 41, len(features))

    return generated



# ===========================
# 5. Sauvegarde et évaluation
# ===========================
def save_generated_data(data, features, output_path):
    scenario_ids = np.arange(1, data.shape[0] + 1)
    df_list = []
    
    for i, scenario in enumerate(data):
        df_scenario = pd.DataFrame(scenario, columns=features)
        df_scenario.insert(0, "Time Step", np.arange(41))  # Time steps de 0 à 40
        df_scenario.insert(0, "Scenario", scenario_ids[i])  # Ajouter le scénario
        df_list.append(df_scenario)
    
    df_final = pd.concat(df_list, axis=0)
    df_final.to_csv(output_path, index=False, sep=";", decimal=",")

In [2]:
csv_path ="/Users/saimanenawal/Documents/EUR_4Q24_EUR.csv" # Remplace par le bon chemin
output_path = "generated_scenarios.csv"

# Charger les données
data, features = load_data(csv_path)

num_scenarios, time_steps, num_features = data.shape
input_dim = time_steps * num_features  # 41 * 40 = 1640
data2 = data.view(num_scenarios, -1)  # Transformer en (num_scenarios, 1640)





   Scenario  Time Step  EUR_Nom_Spot_1  EUR_Nom_Spot_2  EUR_Nom_Spot_3  \
0         1          0        0.023360        0.021924        0.021930   
1         1          1        0.004213        0.004662        0.005203   
2         1          2        0.003470        0.004038        0.004465   
3         1          3        0.005811        0.006172        0.006726   
4         1          4        0.006319        0.007029        0.007629   

   EUR_Nom_Spot_4  EUR_Nom_Spot_5  EUR_Nom_Spot_6  EUR_Nom_Spot_7  \
0        0.022201        0.022420        0.022705        0.022980   
1        0.005659        0.006226        0.006803        0.007358   
2        0.005032        0.005597        0.006119        0.006575   
3        0.007230        0.007651        0.007972        0.008321   
4        0.008133        0.008536        0.008992        0.009302   

   EUR_Nom_Spot_8  ...  EUR_Nom_Spot_33  EUR_Nom_Spot_34  EUR_Nom_Spot_35  \
0        0.023225  ...         0.020410         0.020237       

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scenario_data[features] = scenario_data[features].apply(pd.to_numeric, errors="coerce")  # Remplace les erreurs par NaN
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scenario_data.fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  scenario_data[features] = scenario_data[features].apply(pd.to_numeric, errors="coerce")  # Remplace les 

In [3]:

class LSTM_TBVAEr(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, latent_dim=10, bidirectional=False, dropout_p=0.1):
        super(LSTM_TBVAEr, self).__init__()
        self.latent_dim = latent_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.bidirectional = bidirectional

        self.encoder_fc = nn.Linear(input_dim, hidden_dim)  # Couche Fully Connected avant LSTM
        self.encoder_relu = nn.ReLU()
        self.encoder_lstm = nn.LSTM(hidden_dim, hidden_dim, n_layers, bidirectional=bidirectional, dropout=dropout_p, batch_first=True)
        self.encoder_out = nn.Linear(hidden_dim, latent_dim * 2)  # Couche finale d'encodage

        self.decoder_fc = nn.Linear(latent_dim, hidden_dim)  # Couche Fully Connected avant LSTM
        self.encoder_relu = nn.ReLU()
        self.decoder_lstm = nn.LSTM(hidden_dim, hidden_dim, n_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout_p)
        self.decoder_out = nn.Linear(hidden_dim, input_dim)  # Couche finale de reconstruction

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # Encoder
        x = self.encoder_fc(x)  # Couche Fully Connected
        x, _ = self.encoder_lstm(x)  # On récupère uniquement 'output'
        x = self.encoder_out(x)  # Dernière couche pour mu et logvar
        
        mu, logvar = x[:, :self.latent_dim], x[:, self.latent_dim:]
        z = self.reparameterize(mu, logvar)

        # Decoder
        z = self.decoder_fc(z)  # Couche Fully Connected
        z, _ = self.decoder_lstm(z)  # LSTM avec uniquement 'output'
        decoded = self.decoder_out(z)  # Reconstruction

        return decoded, mu, logvar


In [4]:
class LSTM_TBVAEr2(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_layers, latent_dim=10, bidirectional=False, 
                 dropout_p=0.1, nbCouchesInterm=0):
        super(LSTM_TBVAEr2, self).__init__()
        self.latent_dim = latent_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.bidirectional = bidirectional
        self.nbCouchesInterm = nbCouchesInterm

        # Encoder
        self.encoder_fc_in = nn.Linear(input_dim, hidden_dim)
        self.encoder_relu = nn.ReLU()
        
        # Intermediate layers
        self.encoder_interm = nn.ModuleList()
        for i in range(nbCouchesInterm):
            self.encoder_interm.append(nn.Linear(hidden_dim, hidden_dim))
            self.encoder_interm.append(nn.ReLU())
            #self.encoder_interm.append(nn.Dropout(dropout_p))
        
        self.encoder_lstm = nn.LSTM(hidden_dim, hidden_dim, n_layers, 
                                  bidirectional=bidirectional, dropout=dropout_p, 
                                  batch_first=True)
        
        # Encoder output
        lstm_output_dim = hidden_dim * 2 if bidirectional else hidden_dim
        self.encoder_out = nn.Linear(lstm_output_dim, latent_dim * 2)

        # Decoder
        self.decoder_fc_in = nn.Linear(latent_dim, hidden_dim)
        
        # Intermediate layers
        self.decoder_interm = nn.ModuleList()
        for i in range(nbCouchesInterm):
            self.decoder_interm.append(nn.Linear(hidden_dim, hidden_dim))
            self.decoder_interm.append(nn.ReLU())
            self.decoder_interm.append(nn.Dropout(dropout_p))
        
        self.decoder_lstm = nn.LSTM(hidden_dim, hidden_dim, n_layers, 
                                  batch_first=True, bidirectional=bidirectional, 
                                  dropout=dropout_p)
        self.decoder_out = nn.Linear(hidden_dim, input_dim)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        # Encoder
        batch_size = x.size(0)
        
        # First pass through FC layers
        x = self.encoder_relu(self.encoder_fc_in(x))
        
        # Intermediate layers
        for layer in self.encoder_interm:
            x = layer(x)
        
        # Reshape for LSTM (add sequence length dimension if needed)
        if x.dim() == 2:
            x = x.unsqueeze(1)  # [batch, 1, features]
            
        # LSTM layer
        x, _ = self.encoder_lstm(x)
        
        # Take the last timestep
        x = x[:, -1, :]  # [batch, features]
        
        # Get mu and logvar
        x = self.encoder_out(x)
        mu, logvar = x[:, :self.latent_dim], x[:, self.latent_dim:]
        z = self.reparameterize(mu, logvar)

        # Decoder
        z = self.encoder_relu(self.decoder_fc_in(z))
        
        # Intermediate layers
        for layer in self.decoder_interm:
            z = layer(z)
        
        # Reshape for LSTM (add sequence length dimension)
        z = z.unsqueeze(1)  # [batch, 1, features]
        
        # LSTM layer
        z, _ = self.decoder_lstm(z)
        
        # Output
        decoded = self.decoder_out(z)
        
        return decoded, mu, logvar

def train_tbvaer(model, data, epochs=1000, lr=0.001, kl_weight=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    history = []
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        reconstructed, mu, logvar = model(data)
        
        # Reconstruction Loss
        recon_loss = loss_fn(reconstructed, data)
        
        # KL Divergence
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        
        # Total loss
        loss = recon_loss + kl_weight * kl_loss
        
        loss.backward()
        optimizer.step()
        history.append(loss.item())
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item()}, Recon: {recon_loss.item()}, KL: {kl_loss.item()}")
    
    return history

In [11]:
model6r = LSTM_TBVAEr2(input_dim=1640, hidden_dim=32,n_layers=2)
history6 =train_tbvaer(model6r, data2 )

model7= LSTM_TBVAEr2(input_dim=1640, hidden_dim=64,n_layers=2, nbCouchesInterm=1)
history7 =train_tbvaer(model7, data2 )

model8= LSTM_TBVAEr2(input_dim=1640, hidden_dim=128,n_layers=5, nbCouchesInterm=5)
history8 =train_tbvaer(model8, data2 )

model9= LSTM_TBVAEr(input_dim=1640, hidden_dim=64,n_layers=2)
history9 =train_tbvaer(model9, data2 )

Epoch 0, Loss: 0.013832992874085903, Recon: 0.013198534026741982, KL: 0.6344587802886963
Epoch 100, Loss: 0.0009008095948956907, Recon: 0.0008991623180918396, KL: 0.001647263765335083
Epoch 200, Loss: 0.0008791372529231012, Recon: 0.0008786477847024798, KL: 0.0004894733428955078
Epoch 300, Loss: 0.0008651392417959869, Recon: 0.0008647570502944291, KL: 0.0003822147846221924
Epoch 400, Loss: 0.0008639363222755492, Recon: 0.0008635979611426592, KL: 0.0003383755683898926
Epoch 500, Loss: 0.0008623775793239474, Recon: 0.0008620352600701153, KL: 0.00034230947494506836
Epoch 600, Loss: 0.0008638144354335964, Recon: 0.0008635816047899425, KL: 0.00023284554481506348
Epoch 700, Loss: 0.0008612140081822872, Recon: 0.0008610555087216198, KL: 0.00015848875045776367
Epoch 800, Loss: 0.0008605317561887205, Recon: 0.000860402942635119, KL: 0.00012883543968200684
Epoch 900, Loss: 0.0008602254092693329, Recon: 0.0008601220324635506, KL: 0.00010335445404052734
Epoch 0, Loss: 0.007794986013323069, Recon: 

In [12]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=list(range(len(history7))),
    y=history7,
    name="History7",
    mode="lines"
))

fig.add_trace(go.Scatter(
    x=list(range(len(history8))),
    y=history8,
    name="History8",
    mode="lines"
))

fig.add_trace(go.Scatter(
    x=list(range(len(history9))),
    y=history9,
    name="History9",
    mode="lines"
))

fig.update_layout(
    title="Comparaison des Courbes d'Apprentissage",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    hovermode="x unified"
)

fig.show()
