In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.impute import SimpleImputer
import pandas as pd
import numpy as np

In [None]:
# Vérifiez si un GPU est disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Utilisation du périphérique :", device)

In [None]:
# Charger les données
me_file_path = './dataset/melb_data.csv'
me_data = pd.read_csv(me_file_path)

In [None]:
# Imputation des valeurs manquantes
imputer = SimpleImputer(strategy='median')
me_data[me_data.select_dtypes(include=[np.number]).columns] = imputer.fit_transform(me_data.select_dtypes(include=[np.number]))

In [None]:
# Encoder les colonnes catégoriques via Target Encoding
if 'Suburb' in me_data.columns:
    suburb_mean_price = me_data.groupby('Suburb')['Price'].mean()
    me_data['Suburb_encoded'] = me_data['Suburb'].map(suburb_mean_price)
    me_data.drop(columns=['Suburb'], inplace=True)

In [None]:
# Créer des caractéristiques supplémentaires
me_data['Landsize_squared'] = me_data['Landsize'] ** 2
me_data['BuildingArea_log'] = np.log1p(me_data['BuildingArea'])
me_data['Rooms_Bathroom'] = me_data['Rooms'] * me_data['Bathroom']

In [None]:
# Sélectionner les caractéristiques importantes et la cible
y = me_data['Price']
fme_features = [
    'Suburb_encoded', 'Rooms', 'Longtitude', 'Lattitude', 
    'Landsize_squared', 'Landsize', 'Rooms_Bathroom',
    'YearBuilt', 'BuildingArea_log', 'BuildingArea', 'Bathroom'
]
X = me_data[fme_features]

In [None]:
# Normalisation des données
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Diviser les données en ensembles d'entraînement et de validation
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Convertir les données en tenseurs PyTorch
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).view(-1, 1).to(device)

In [None]:
# Définir le modèle
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)  # Sortie pour la régression
        )
    
    def forward(self, x):
        return self.model(x)

In [None]:
# Initialiser le modèle
input_dim = X_train.shape[1]
model = NeuralNetwork(input_dim).to(device)

In [None]:
# Définir la fonction de perte et l'optimiseur
criterion = nn.MSELoss()  # Erreur quadratique moyenne
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Entraîner le modèle
epochs = 100
for epoch in range(epochs):
    # Mode entraînement
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()

    # Mode évaluation
    model.eval()
    with torch.no_grad():
        val_predictions = model(X_val_tensor)
        val_loss = criterion(val_predictions, y_val_tensor)

    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}")


In [None]:
# Évaluation finale
model.eval()
with torch.no_grad():
    final_predictions = model(X_val_tensor).cpu().numpy()
    mae_final = mean_absolute_error(y_val, final_predictions)
    print("Final Mean Absolute Error:", mae_final)