# Hackaton La Rondelle

In [18]:
import pandas as pd
import xarray as xr

ds1 = xr.open_dataset("data/OSSE_U_V_SLA_SST_train.nc")
ds1

In [17]:
ds2 = xr.open_dataset("data/eddies_train.nc")
ds2

In [19]:
print(ds1.dims)  # Dimensions (latitude, longitude, time_counter)
print(ds1.coords)  # Coordonnées disponibles
print(ds1.data_vars)
print(ds2.dims)  # Dimensions (latitude, longitude, time_counter)
print(ds2.coords)  # Coordonnées disponibles
print(ds2.data_vars)


Coordinates:
    deptht        float32 4B ...
  * latitude      (latitude) float32 1kB -1.084e-19 2.967 0.0 ... 3.099 0.0
  * longitude     (longitude) float32 3kB 3.689e+19 -3.352 0.0 ... -3.221 0.0
  * time_counter  (time_counter) datetime64[ns] 2kB 2015-01-02T12:00:00 ... 2...
Data variables:
    vomecrtyT  (time_counter, latitude, longitude) float32 291MB ...
    vozocrtxT  (time_counter, latitude, longitude) float32 291MB ...
    sossheig   (time_counter, latitude, longitude) float32 291MB ...
    votemper   (time_counter, latitude, longitude) float32 291MB ...
Coordinates:
  * time     (time) datetime64[ns] 2kB 2015-01-02T12:00:00 ... 2015-12-25T12:...
Data variables:
    eddies   (time, latitude, longitude) float64 582MB ...


In [23]:
# Renommer la dimension "time" en "time_counter" dans ds2
ds2_renamed = ds2.rename({"time": "time_counter"})

# Fusionner les deux datasets
ds = xr.merge([ds1, ds2_renamed])

# Afficher un aperçu du dataset fusionné
print(ds)
ds

<xarray.Dataset> Size: 2GB
Dimensions:       (latitude: 357, longitude: 717, time_counter: 284)
Coordinates:
    deptht        float32 4B ...
  * latitude      (latitude) float32 1kB -1.084e-19 2.967 0.0 ... 3.099 0.0
  * longitude     (longitude) float32 3kB 3.689e+19 -3.352 0.0 ... -3.221 0.0
  * time_counter  (time_counter) datetime64[ns] 2kB 2015-01-02T12:00:00 ... 2...
Data variables:
    vomecrtyT     (time_counter, latitude, longitude) float32 291MB ...
    vozocrtxT     (time_counter, latitude, longitude) float32 291MB ...
    sossheig      (time_counter, latitude, longitude) float32 291MB ...
    votemper      (time_counter, latitude, longitude) float32 291MB ...
    eddies        (time_counter, latitude, longitude) float64 582MB ...
Attributes:
    history:  Mon Feb 20 14:04:07 2023: ncks -A SST_2015_merged.nc SSH_U_V_20...
    NCO:      4.4.7


In [None]:
#ds_merged.close()

In [26]:
print(ds['eddies'].isel(time_counter=0))  # Affiche les eddies pour le premier timestamp


<xarray.DataArray 'eddies' (latitude: 357, longitude: 717)> Size: 2MB
[255969 values with dtype=float64]
Coordinates:
    deptht        float32 4B ...
  * latitude      (latitude) float32 1kB -1.084e-19 2.967 0.0 ... 3.099 0.0
  * longitude     (longitude) float32 3kB 3.689e+19 -3.352 0.0 ... -3.221 0.0
    time_counter  datetime64[ns] 8B 2015-01-02T12:00:00
Attributes:
    units:          type: cyc (1) acyc(2) 
    standard_name:  eddy filled contours


## II - Réseau de neurone

In [24]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import xarray as xr
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, random_split

# Charger les données Xarray

# Extraire les variables en entrée
X = np.stack([
    ds['vomecrtyT'].values,
    ds['vozocrtxT'].values,
    ds['sossheig'].values,
    ds['votemper'].values
], axis=-1)  # (time_counter, latitude, longitude, 4)

# Variable cible
Y = ds['eddies'].values  # (time_counter, latitude, longitude)

# Transformer les données en tenseurs PyTorch
X_tensor = torch.tensor(X, dtype=torch.float32)
Y_tensor = torch.tensor(Y, dtype=torch.float32).unsqueeze(-1)  # Ajouter une dimension

# Diviser les données en train / validation
dataset = TensorDataset(X_tensor, Y_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoader pour l'entraînement
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Définition du modèle
class EddyPredictor(nn.Module):
    def __init__(self):
        super(EddyPredictor, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(4, 128),  # 4 variables en entrée
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # 1 sortie (eddies)
        )

    def forward(self, x):
        return self.model(x)

# Initialisation du modèle
model = EddyPredictor()

# Définition de la fonction de coût et de l'optimiseur
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Entraînement du modèle
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Évaluation sur le jeu de validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

    print(f"Époque {epoch+1}/{num_epochs} - Perte entraînement: {total_loss:.4f}, Perte validation: {val_loss:.4f}")

# Sauvegarde du modèle
torch.save(model.state_dict(), "eddy_predictor.pth")
