<a href="https://colab.research.google.com/github/paulo-generozo/paulo-generozo/blob/main/DEEP_LEARNING_TITANIC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Instalação e Importação de Bibliotecas

In [14]:
!pip install -q kaggle torch
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

#Verificação de GPU

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Dispositivo: {device}")

#Explicação: PyTorch permite acelerar cálculos com GPU. Usamos torch.cuda.is_available() para verificar se há suporte a CUDA.

Dispositivo: cuda


#Carregamento e Pré-processamento dos Dados

In [16]:
#Download do Dataset

from google.colab import files
files.upload()  # Faz upload do kaggle.json
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c titanic
!unzip titanic.zip

Saving kaggle.json to kaggle (2).json
titanic.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  titanic.zip
replace gender_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: gender_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [17]:
#Engenharia de Features

def feature_engineering(df):
    df['Title'] = df['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
    df['Title'] = df['Title'].replace(['Lady', 'Countess', 'Dr', 'Major'], 'Rare')
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['Age'].fillna(df['Age'].median(), inplace=True)
    return df


#    Objetivo:
#Extrair títulos dos nomes (Mr, Mrs, Miss).

#Criar FamilySize para capturar relações familiares.

#Preencher valores faltantes em Age com a mediana.

#Transformação dos Dados

In [18]:
#Pipeline de Pré-processamento
import pandas as pd

# Load the training data from the 'train.csv' file
train_data = pd.read_csv('train.csv')

numeric_features = ['Age', 'Fare']
categorical_features = ['Pclass', 'Sex', 'Embarked']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

X_train = preprocessor.fit_transform(train_data)
y_train = train_data['Survived'].values


#StandardScaler: Normaliza features numéricas.

#OneHotEncoder: Transforma variáveis categóricas em binárias.

In [19]:
#Conversão para Tensores PyTorch

X_train_tensor = torch.FloatTensor(X_train).to(device)  # Removed .toarray()
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1).to(device)

#.to(device): Envia dados para GPU, se disponível.

#Definição do Modelo em PyTorch

In [20]:
#Arquitetura da Rede Neural

class TitanicModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)
#Camadas:

#nn.Linear: Camadas totalmente conectadas.

#nn.ReLU: Ativação não linear.

#nn.Dropout: Regularização para evitar overfitting.


#Treinamento e Validação

In [21]:
#Arquitetura da Rede Neural

class TitanicModel(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid() # Added sigmoid activation to the last layer
        )

    def forward(self, x):
        return self.network(x)
#Camadas:

#nn.Linear: Camadas totalmente conectadas.

#nn.ReLU: Ativação não linear.

#nn.Dropout: Regularização para evitar overfitting.

#nn.Sigmoid: Assegura que a saída esteja entre 0 e 1,
#             adequada para BCELoss.

#Geração de Previsões


In [22]:
# Carregamento e Pré-processamento dos Dados de Teste
test_data = pd.read_csv('test.csv')  # Load the test data

# Aplicar a mesma engenharia de features aos dados de teste
test_data = feature_engineering(test_data)

# Aplicar o mesmo pré-processamento aos dados de teste
X_test = preprocessor.transform(test_data)

# Converter os dados de teste para tensores PyTorch
# Check for NaN or infinite values in X_test
if np.isnan(X_test).any() or np.isinf(X_test).any():
    # Replace NaN and infinite values with a suitable value (e.g., 0)
    X_test = np.nan_to_num(X_test)

TORCH_USE_CUDA_DSAX_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device) # Explicitly set dtype and device

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)


In [23]:
# Carregamento e Pré-processamento dos Dados de Teste
test_data = pd.read_csv('test.csv')  # Load the test data

# Aplicar a mesma engenharia de features aos dados de teste
test_data = feature_engineering(test_data)

# Aplicar o mesmo pré-processamento aos dados de teste
X_test = preprocessor.transform(test_data)

# Converter os dados de teste para tensores PyTorch
# Check for NaN or infinite values in X_test and replace them with 0
X_test = np.nan_to_num(X_test) # This line replaces NaN and infinite values with 0

X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device) # Explicitly set dtype and device

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)


In [24]:
#Submissão no Kaggle
# Instantiate the model with the correct input size
model = TitanicModel(input_size=X_test_tensor.shape[1]).to(device) # Assuming X_test_tensor has the correct shape

# ... (rest of your code) ...
test_preds = (model(X_test_tensor) > 0.5).int().cpu().numpy()
submission = pd.DataFrame({
    'PassengerId': test_data['PassengerId'],
    'Survived': test_preds.flatten()
})
submission.to_csv('submission.csv', index=False)

#.cpu().numpy(): Converte tensores para CPU e depois para NumPy.