In [1]:
# Importando as bibliotecas necessárias
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importando o módulo .py com o Generator e Discriminator
from gan import Discriminator, Generator  # Substitua 'seu_arquivo' pelo nome do arquivo .py

In [26]:
target_features=["yield_strength",
"ultimate_tensile_strength",]

target = target_features[0]

X_train = pd.read_csv('processed_data/knn/processed_train_knn.csv')
y_train = pd.read_csv('data/train.csv')
y_train = y_train[target]

X_valid = pd.read_csv('processed_data/knn/processed_validation_knn.csv')
y_valid = pd.read_csv('data/validation.csv')
y_valid = y_valid[target]

X_test = pd.read_csv('processed_data/knn/processed_test_knn.csv')
y_test = pd.read_csv('data/test.csv')
y_test = y_test[target]

print(X_train.head())



   carbon_concentration_per_weight  silicon_concentration_per_weight  \
0                         0.041382                          0.321143   
1                         0.082378                         -0.821959   
2                        -0.083932                          0.216021   
3                         1.266667                          0.424351   
4                         0.242557                         -0.456118   

   manganese_concentration_per_weight  sulphur_concentration_per_weight  \
0                           -0.922977                          1.830136   
1                            0.353442                          0.263364   
2                            0.117702                         -0.305449   
3                            1.777874                          0.263364   
4                            0.412394                          0.000000   

   phosphorus_concentration_per_weight  nickel_concentration_per_weight  \
0                             0.682994   

In [3]:
y_test.isna()

0       True
1      False
2       True
3       True
4       True
       ...  
243    False
244     True
245     True
246    False
247     True
Name: yield_strength, Length: 248, dtype: bool

In [34]:
# Dividindo o dataset em treino e teste
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from torch.utils.data import DataLoader, TensorDataset

generator = Generator(size = X_train.shape[1])  # Tamanho baseado no número de features
discriminator = Discriminator(size = X_train.shape[1])

# X são as features e y é a coluna alvo 'Yield strength'
# Convertendo os dados em tensores
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Definindo otimizadores
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.00005)

# Função de perda
criterion = nn.BCELoss()

# Função de treinamento da GAN (mesma de antes)
def train_GAN(epochs=10000, print_every=1000):
    for epoch in range(epochs):
        for real_data, _ in train_loader:  # Itera sobre batches
            # Gerar ruído
            z = torch.randn(real_data.size(0), real_data.size(1))  # Ruído no formato das features
            
            # Gerar amostras falsas (saídas do generator)
            fake_data = generator(z)
            
            # Treinando o Discriminator
            real_preds = discriminator(real_data)
            fake_preds = discriminator(fake_data.detach())
            
            # Definindo rótulos
            real_labels = torch.ones(real_data.size(0), 1) * 0.9 
            fake_labels = torch.zeros(real_data.size(0), 1) + 0.1 
            
            # Calculando a perda do Discriminator
            loss_real = criterion(real_preds, real_labels)
            loss_fake = criterion(fake_preds, fake_labels)
            loss_D = (loss_real + loss_fake) / 2
            
            # Otimizando o Discriminator
            optimizer_D.zero_grad()
            loss_D.backward()
            optimizer_D.step()
            
            # Treinando o Generator
            fake_preds = discriminator(fake_data)
            loss_G = criterion(fake_preds, real_labels)  # O generator quer enganar o discriminator
            
            # Otimizando o Generator
            optimizer_G.zero_grad()
            loss_G.backward()
            optimizer_G.step()
        
        # Exibir o progresso
        if epoch % print_every == 0:
            print(f'Epoch {epoch}/{epochs} - Loss D: {loss_D.item()}, Loss G: {loss_G.item()}')

# Treinar a GAN
train_GAN(epochs=150, print_every=10)

Epoch 0/150 - Loss D: 0.682395875453949, Loss G: 0.710929811000824
Epoch 10/150 - Loss D: 0.9691396355628967, Loss G: 0.4184715747833252
Epoch 20/150 - Loss D: 0.6626369953155518, Loss G: 0.8682767152786255
Epoch 30/150 - Loss D: 0.5845738649368286, Loss G: 0.7712709307670593
Epoch 40/150 - Loss D: 0.6291539072990417, Loss G: 0.7660590410232544
Epoch 50/150 - Loss D: 0.6940709352493286, Loss G: 0.7642236351966858
Epoch 60/150 - Loss D: 0.7006665468215942, Loss G: 0.6411969661712646
Epoch 70/150 - Loss D: 0.6931097507476807, Loss G: 0.7931127548217773
Epoch 80/150 - Loss D: 0.6161899566650391, Loss G: 0.8124905228614807
Epoch 90/150 - Loss D: 0.6493948698043823, Loss G: 0.7976003885269165
Epoch 100/150 - Loss D: 0.6933438181877136, Loss G: 0.7570127248764038
Epoch 110/150 - Loss D: 0.6333661079406738, Loss G: 0.8595978021621704
Epoch 120/150 - Loss D: 0.6797140836715698, Loss G: 0.8967921137809753
Epoch 130/150 - Loss D: 0.5573831796646118, Loss G: 0.7976824045181274
Epoch 140/150 - Los

In [41]:
# Avaliação do Generator após o treino
with torch.no_grad():
    z_test = torch.randn(X_test_tensor.size(0), X_test_tensor.size(1))  # Gerar ruído para o conjunto de teste
    generated_data = generator(z_test) #ultuma linha é o y

# Comparar os resultados gerados com o conjunto de teste real
y_pred = generated_data.numpy()
y_true = X_test_tensor.numpy()
print(f'Shape of y_true: {y_true.shape}')
print(f'Shape of y_pred: {y_pred.shape}')
nan_indices = np.isnan(y_true).flatten()  # Obter um array booleano indicando onde estão os NaNs

# Remover as linhas correspondentes em y_true e y_pred
y_true_cleaned = y_true[~nan_indices]  # Mantém apenas as linhas onde não há NaNs
y_pred_cleaned = y_pred[~nan_indices]  # Remove as mesmas linhas em y_pred

# Exibir as novas formas para confirmar a remoção correta
print(f'Shape of y_true_cleaned: {y_true_cleaned.shape}')
print(f'Shape of y_pred_cleaned: {y_pred_cleaned.shape}')

# Agora você pode calcular as métricas usando y_true_cleaned e y_pred_cleaned
mse = mean_squared_error(y_true_cleaned, y_pred_cleaned[:, -1])
mae = mean_absolute_error(y_true_cleaned, y_pred_cleaned[:, -1])
r2 = r2_score(y_true_cleaned, y_pred_cleaned[:, -1])

print(f'MSE: {mse}, MAE: {mae}, R²: {r2}')

Shape of y_true: (248, 42)
Shape of y_pred: (248, 42)
Shape of y_true_cleaned: (248, 42)
Shape of y_pred_cleaned: (248, 42)
MSE: 13.97876262664795, MAE: 0.9396845698356628, R²: -0.3035505060254361
