<a href="https://colab.research.google.com/github/ludoveltz/test_github_fev25/blob/main/Daily_challenge_day4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install torch torchvision torchaudio


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import os

# Afficher le contenu du Drive
print("Contenu du Drive :")
print(os.listdir('/content/drive/My Drive'))


Contenu du Drive :
['household_power_consumption.txt', 'metadata.csv', 'Data', 'Colab Notebooks']


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Chemin correct vers le fichier (directement à la racine du Drive)
chemin = "/content/drive/My Drive/household_power_consumption.txt"

try:
    # Première lecture sans parse_dates
    df = pd.read_csv(chemin,
                     sep=';',
                     na_values=['?'])

    # Création de la colonne datetime après la lecture
    df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'],
                                  format='%d/%m/%Y %H:%M:%S')

    # Suppression des colonnes Date et Time originales
    df = df.drop(['Date', 'Time'], axis=1)

    # Utilisation de ffill() pour les valeurs manquantes
    df = df.ffill()

    # Conversion des colonnes en type float
    columns_to_convert = ['Global_active_power', 'Global_reactive_power',
                         'Voltage', 'Global_intensity',
                         'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']

    for col in columns_to_convert:
        df[col] = df[col].astype(float)

    # Création de la cible : consommation du jour suivant
    df['target'] = df['Global_active_power'].shift(-1)
    df = df.dropna()

    # Normalisation des données
    scaler = MinMaxScaler()
    features = ['Global_active_power', 'Global_reactive_power',
                'Voltage', 'Global_intensity',
                'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']

    df_scaled = pd.DataFrame(scaler.fit_transform(df[features]), columns=features)
    df_scaled['target'] = scaler.fit_transform(df[['target']])

    # Affichage des résultats
    print("Aperçu des données après correction :")
    print(df.head())
    print("\nInformations sur le dataset :")
    print(df.info())

    print("\nAperçu des données normalisées :")
    print(df_scaled.head())

except FileNotFoundError:
    print(f"Erreur : Le fichier n'a pas été trouvé à l'emplacement : {chemin}")
except Exception as e:
    print(f"Une erreur s'est produite : {str(e)}")



Aperçu des données après correction :
   Global_active_power  Global_reactive_power  Voltage  Global_intensity  \
0                4.216                  0.418   234.84              18.4   
1                5.360                  0.436   233.63              23.0   
2                5.374                  0.498   233.29              23.0   
3                5.388                  0.502   233.74              23.0   
4                3.666                  0.528   235.68              15.8   

   Sub_metering_1  Sub_metering_2  Sub_metering_3            datetime  target  
0             0.0             1.0            17.0 2006-12-16 17:24:00   5.360  
1             0.0             1.0            16.0 2006-12-16 17:25:00   5.374  
2             0.0             2.0            17.0 2006-12-16 17:26:00   5.388  
3             0.0             1.0            17.0 2006-12-16 17:27:00   3.666  
4             0.0             1.0            17.0 2006-12-16 17:28:00   3.520  

Informations sur le data

In [13]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib
from tqdm.notebook import tqdm

# 3. Préparation des données
class PowerConsumptionDataset(Dataset):
    def __init__(self, data, sequence_length=24):
        self.sequence_length = sequence_length
        self.data = torch.FloatTensor(data)

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, idx):
        X = self.data[idx:idx + self.sequence_length]
        y = self.data[idx + self.sequence_length, 0]  # Global_active_power
        return X, y

# Préparation des données
def prepare_data(df, sequence_length=24):
    # Normalisation
    scaler = MinMaxScaler()
    df_scaled = pd.DataFrame(
        scaler.fit_transform(df),
        columns=df.columns,
        index=df.index
    )

    # Création des séquences
    data = df_scaled.values

    # Division des données
    train_size = int(0.7 * len(data))
    val_size = int(0.15 * len(data))

    train_data = data[:train_size]
    val_data = data[train_size:train_size+val_size]
    test_data = data[train_size+val_size:]

    # Création des datasets
    train_dataset = PowerConsumptionDataset(train_data, sequence_length)
    val_dataset = PowerConsumptionDataset(val_data, sequence_length)
    test_dataset = PowerConsumptionDataset(test_data, sequence_length)

    # Création des dataloaders
    batch_size = 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    return train_loader, val_loader, test_loader, scaler


In [14]:
# 4. Définition du modèle LSTM
class HybridModel(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super(HybridModel, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=2,
            batch_first=True,
            dropout=0.2
        )
        self.gru = nn.GRU(
            input_size=hidden_size,
            hidden_size=hidden_size,
            num_layers=2,
            batch_first=True,
            dropout=0.2
        )
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        gru_out, _ = self.gru(lstm_out)
        out = self.fc(gru_out[:, -1, :])
        return out

In [15]:
# 5. Entraînement du modèle
def train_model(model, train_loader, val_loader, num_epochs=20):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    train_losses = []
    val_losses = []

    progress_bar = tqdm(range(num_epochs), desc='Entraînement')

    for epoch in progress_bar:
        # Phase d'entraînement
        model.train()
        train_loss = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs.squeeze(), y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Phase de validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                outputs = model(X)
                val_loss += criterion(outputs.squeeze(), y).item()

        # Calcul des pertes moyennes
        avg_train_loss = train_loss/len(train_loader)
        avg_val_loss = val_loss/len(val_loader)

        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)

        # Mise à jour de la barre de progression
        progress_bar.set_postfix({
            'train_loss': f'{avg_train_loss:.4f}',
            'val_loss': f'{avg_val_loss:.4f}'
        })

    return model, train_losses, val_losses



In [16]:
# 6. Évaluation du modèle
def evaluate_model(model, test_loader, scaler):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            predictions.extend(outputs.cpu().numpy())
            actuals.extend(y.cpu().numpy())

    # Calcul du R²
    r2 = r2_score(actuals, predictions)
    print(f'Score R² : {r2:.4f}')

    # Sauvegarde du scaler
    joblib.dump(scaler, 'power_consumption_scaler.joblib')

    return predictions, actuals


In [20]:
# 7. Prédictions futures
def predict_next_day(model, last_sequence, scaler):
    model.eval()
    with torch.no_grad():
        # Préparation des données
        sequence = torch.FloatTensor(last_sequence).unsqueeze(0)
        # Prédiction
        prediction = model(sequence)
        # Inverse transform
        prediction = prediction.numpy()
        prediction_original = scaler.inverse_transform(
            np.concatenate([prediction, np.zeros((len(prediction), 6))], axis=1)
        )[:, 0]
    return prediction_original[0]

# Utilisation du code
if __name__ == "__main__":
    # Configuration
    num_epochs = 10  # Défini ici pour Google Colab

    # Chargement des données
    chemin = "/content/drive/My Drive/household_power_consumption.txt"
    df = pd.read_csv(chemin, sep=';', na_values=['?'])

    # Préparation des données avec correction des warnings
    df['datetime'] = pd.to_datetime(
        df['Date'] + ' ' + df['Time'],
        format='%d/%m/%Y %H:%M:%S',
        dayfirst=True
    )
    df = df.drop(['Date', 'Time'], axis=1)
    df = df.set_index('datetime')
    df = df.ffill()  # Utilisation de ffill() au lieu de fillna(method='ffill')

    # Création des datasets et du modèle
    train_loader, val_loader, test_loader, scaler = prepare_data(df)
    model = HybridModel(input_size=7)  # 7 features
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Entraînement avec barre de progression style Keras
    print("Début de l'entraînement...")
    for epoch in range(num_epochs):
        model.train()
        print(f'\nEpoch {epoch+1}/{num_epochs}')

        for batch_idx, (X, y) in enumerate(train_loader):
            # Affichage de la progression style Keras
            progress = (batch_idx + 1) / len(train_loader)
            bar_length = 30
            filled_length = int(bar_length * progress)
            bar = '=' * filled_length + '-' * (bar_length - filled_length)

            # Training step
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs.squeeze(), y)
            loss.backward()
            optimizer.step()

            # Affichage de la progression avec la perte
            print(f'\r[{bar}] - {progress*100:.1f}% - loss: {loss.item():.4f}', end='')

    # Évaluation
    predictions, actuals = evaluate_model(model, test_loader, scaler)

    # Exemple de prédiction future
    last_sequence = next(iter(test_loader))[0][0].numpy()
    next_day_prediction = predict_next_day(model, last_sequence, scaler)
    print(f'\nPrédiction pour le prochain jour : {next_day_prediction:.2f} kW')




Début de l'entraînement...

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Prédiction pour le prochain jour : 0.56 kW
