In [11]:
# 1: Configuração inicial
!nvidia-smi
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install --upgrade xgboost


# 2: Importações e verificação de GPU
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
from xgboost import DMatrix
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Verificar GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Dispositivo utilizado: {device}')
if str(device) == 'cpu':
    print("\n⚠️ ATENÇÃO: GPU não detectada!")
    print("1. Vá em Runtime > Change runtime type")
    print("2. Selecione 'GPU' em Hardware accelerator")
    print("3. Reinicie o runtime (Runtime > Restart runtime)")


# 3: Configurações e pré-processamento
TAXA_CONVERSAO = 90

def preprocessar_dados(df):
    try:
        # Conversão de moeda
        df['price'] = df['price'] / TAXA_CONVERSAO

        # Remoção de colunas
        df = df.drop(columns=['Unnamed: 0', 'flight'], errors='ignore')

        # Mapeamento de categorias
        time_map = {'Early_Morning':0, 'Morning':1, 'Afternoon':2, 'Evening':3, 'Night':4}
        stops_map = {'zero':0, 'one':1, 'two_or_more':2}

        df['departure_time'] = df['departure_time'].map(time_map).fillna(4)
        df['arrival_time'] = df['arrival_time'].map(time_map).fillna(4)
        df['stops'] = df['stops'].map(stops_map).fillna(2)

        # Codificação de labels
        cat_cols = ['airline', 'source_city', 'destination_city', 'class']
        for col in cat_cols:
            df[col] = LabelEncoder().fit_transform(df[col].astype(str))

        # Limpeza final
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        df.dropna(inplace=True)
        df['price'] = np.log1p(df['price'])

        return df.drop('price', axis=1), df['price']

    except Exception as e:
        print(f"Erro no pré-processamento: {str(e)}")
        raise


# 4: Dataset e modelo neural
class FlightDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class FlightPriceNet(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.01),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.01),

            nn.Linear(64, 1)
        )

        for layer in self.net:
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight, nonlinearity='leaky_relu')
                nn.init.constant_(layer.bias, 0.01)

    def forward(self, x):
        return self.net(x)


# 5: Funções de treino
def treinar_xgboost(X_train, y_train, X_val, y_val):
    dtrain = DMatrix(X_train, y_train)
    dval = DMatrix(X_val, y_val)

    params = {
        'objective': 'reg:squarederror',
        'tree_method': 'hist',
        'device': 'cuda',
        'learning_rate': 0.03,
        'max_depth': 10,
        'min_child_weight': 2,
        'subsample': 0.9,
        'colsample_bytree': 0.8,
        'gamma': 0.1,
        'lambda': 1.5,
        'alpha': 0.2,
        'grow_policy': 'depthwise'
    }

    model = xgb.train(
        params,
        dtrain,
        num_boost_round=2000,
        evals=[(dval, 'val')],
        early_stopping_rounds=50,
        verbose_eval=100
    )
    return model

def treinar_rede_neural(X_train, y_train, X_val, y_val, input_size):
    torch.cuda.empty_cache()

    train_loader = DataLoader(
        FlightDataset(X_train, y_train),
        batch_size=2048,
        shuffle=True,
        pin_memory=True,
        num_workers=0
    )

    val_loader = DataLoader(
        FlightDataset(X_val, y_val),
        batch_size=2048,
        pin_memory=True,
        num_workers=0
    )

    model = FlightPriceNet(input_size).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.1)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
    criterion = nn.HuberLoss()

    best_loss = float('inf')
    patience, no_improve = 10, 0

    for epoch in range(100):
        model.train()
        train_loss = 0

        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            if torch.isnan(loss):
                continue

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            train_loss += loss.item()

        # Validação
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                val_loss += criterion(model(X_batch), y_batch).item()

        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            no_improve = 0
        else:
            no_improve += 1

        print(f'Epoch {epoch+1:03d} | Loss: {train_loss/len(train_loader):.4f} | Val Loss: {val_loss:.4f}')

        if no_improve >= patience:
            print(f'Early stopping @ {epoch+1}')
            break

    model.load_state_dict(torch.load('best_model.pth', map_location=device, weights_only=True))
    return model


# 6:Pipeline principal

def main():
    # Carregar dados
    try:
        df = pd.read_csv('/content/Clean_Dataset.csv')
        print("Dados carregados com sucesso!")
    except Exception as e:
        print(f"Erro ao carregar dados: {str(e)}")
        return

    # Pré-processamento
    X, y = preprocessar_dados(df)

    # Split dos dados
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

    # Normalização
    scaler = StandardScaler()
    X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X.columns)
    X_val = pd.DataFrame(scaler.transform(X_val), columns=X.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)

    # Treino dos modelos
    print("\n🚀 Iniciando treino do XGBoost...")
    xgb_model = treinar_xgboost(X_train, y_train, X_val, y_val)

    print("\n🧠 Iniciando treino da Rede Neural...")
    nn_model = treinar_rede_neural(X_train, y_train, X_val, y_val, X_train.shape[1])

    # Avaliação
    def avaliar(model, neural=False):
        if neural:
            model.eval()
            test_set = FlightDataset(X_test, y_test)
            loader = DataLoader(test_set, batch_size=1024)
            preds = []
            with torch.no_grad():
                for X_batch, _ in loader:
                    preds.append(model(X_batch.to(device)).cpu().numpy())
            y_pred = np.expm1(np.concatenate(preds))
        else:
            y_pred = np.expm1(xgb_model.predict(DMatrix(X_test)))

        y_true = np.expm1(y_test)
        return {
            'RMSE (€)': f"{np.sqrt(mean_squared_error(y_true, y_pred)):.2f}",
            'MAE (€)': f"{mean_absolute_error(y_true, y_pred):.2f}",
            'R² Score': f"{r2_score(y_true, y_pred):.4f}"
        }

    print("\n📊 Resultados Finais:")
    print("═"*40)
    print("XGBoost:")
    for k, v in avaliar(xgb_model).items():
        print(f"• {k}: {v}")

    print("\nRede Neural:")
    for k, v in avaliar(nn_model, neural=True).items():
        print(f"• {k}: {v}")

if __name__ == "__main__":
    main()
    print("\n✅ Processo concluído!")

Sun Jan 26 23:23:05 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   57C    P0              29W /  70W |    213MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    