In [2]:
# Importando as bibliotecas necessárias
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importando o módulo .py com o Generator e Discriminator
from gan import Discriminator, Generator  # Substitua 'seu_arquivo' pelo nome do arquivo .py

In [19]:
target_features=["yield_strength",
"ultimate_tensile_strength",]

target = target_features[0]

X_train = pd.read_csv('processed_data/knn/processed_train_knn.csv')
y_train = pd.read_csv('processed_data/knn/y_processed_train_knn.csv')
y_train = y_train[target]

X_valid = pd.read_csv('processed_data/knn/processed_validation_knn.csv')
y_valid = pd.read_csv('processed_data/knn/y_processed_validation_knn.csv')
y_valid = y_valid[target]

X_test = pd.read_csv('processed_data/knn/processed_test_knn.csv')
y_test = pd.read_csv('processed_data/knn/y_processed_test_knn.csv')
y_test = y_test[target]

print(y_test)



0      506.534301
1      482.000000
2      506.534301
3      506.534301
4      506.534301
          ...    
243    665.000000
244    506.534301
245    506.534301
246    603.000000
247    506.534301
Name: yield_strength, Length: 248, dtype: float64


In [20]:
y_test.isna()

0      False
1      False
2      False
3      False
4      False
       ...  
243    False
244    False
245    False
246    False
247    False
Name: yield_strength, Length: 248, dtype: bool

In [21]:
# Dividindo o dataset em treino e teste
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from torch.utils.data import DataLoader, TensorDataset

generator = Generator(size = X_train.shape[1])  # Tamanho baseado no número de features
discriminator = Discriminator(size = X_train.shape[1])

# X são as features e y é a coluna alvo 'Yield strength'
# Convertendo os dados em tensores
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Definindo otimizadores
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

# Função de perda
criterion = nn.BCELoss()

# Função de treinamento da GAN (mesma de antes)
def train_GAN(epochs=10000, print_every=1000):
    for epoch in range(epochs):
        for real_data, _ in train_loader:  # Itera sobre batches
            # Gerar ruído
            z = torch.randn(real_data.size(0), real_data.size(1))  # Ruído no formato das features
            
            # Gerar amostras falsas (saídas do generator)
            fake_data = generator(z)
            
            # Treinando o Discriminator
            real_preds = discriminator(real_data)
            fake_preds = discriminator(fake_data.detach())
            
            # Definindo rótulos
            real_labels = torch.ones(real_data.size(0), 1)
            fake_labels = torch.zeros(real_data.size(0), 1)
            
            # Calculando a perda do Discriminator
            loss_real = criterion(real_preds, real_labels)
            loss_fake = criterion(fake_preds, fake_labels)
            loss_D = (loss_real + loss_fake) / 2
            
            # Otimizando o Discriminator
            optimizer_D.zero_grad()
            loss_D.backward()
            optimizer_D.step()
            
            # Treinando o Generator
            fake_preds = discriminator(fake_data)
            loss_G = criterion(fake_preds, real_labels)  # O generator quer enganar o discriminator
            
            # Otimizando o Generator
            optimizer_G.zero_grad()
            loss_G.backward()
            optimizer_G.step()
        
        # Exibir o progresso
        if epoch % print_every == 0:
            print(f'Epoch {epoch}/{epochs} - Loss D: {loss_D.item()}, Loss G: {loss_G.item()}')

# Treinar a GAN
train_GAN(epochs=300, print_every=10)

# Avaliação do Generator após o treino
with torch.no_grad():
    z_test = torch.randn(X_test_tensor.size(0), X_test_tensor.size(1))  # Gerar ruído para o conjunto de teste
    generated_data = discriminator(z_test) #ultuma linha é o y

# Comparar os resultados gerados com o conjunto de teste real
print(generated_data)
y_pred = generated_data.numpy()
y_true = y_test_tensor.numpy()
print(f'Shape of y_true: {y_true.shape}')
print(f'Shape of y_pred: {y_pred.shape}')

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse}, MAE: {mae}, R²: {r2}')

Epoch 0/300 - Loss D: 0.5525689125061035, Loss G: 0.7278429865837097
Epoch 10/300 - Loss D: 0.11269483715295792, Loss G: 2.00553822517395
Epoch 20/300 - Loss D: 0.047050997614860535, Loss G: 2.713569164276123
Epoch 30/300 - Loss D: 0.042540401220321655, Loss G: 4.390963077545166
Epoch 40/300 - Loss D: 0.008986469358205795, Loss G: 5.199468612670898
Epoch 50/300 - Loss D: 0.00831757765263319, Loss G: 5.253561019897461
Epoch 60/300 - Loss D: 0.0051886821165680885, Loss G: 5.259300231933594
Epoch 70/300 - Loss D: 0.016027668491005898, Loss G: 4.3622965812683105
Epoch 80/300 - Loss D: 0.002429797314107418, Loss G: 5.869050979614258
Epoch 90/300 - Loss D: 0.0008586421608924866, Loss G: 7.492603302001953
Epoch 100/300 - Loss D: 0.0025031487457454205, Loss G: 5.763716697692871
Epoch 110/300 - Loss D: 0.6771602630615234, Loss G: 4.980943202972412
Epoch 120/300 - Loss D: 0.0021782200783491135, Loss G: 6.450273513793945
Epoch 130/300 - Loss D: 0.2170465886592865, Loss G: 8.961853981018066
Epoch 

In [9]:
import pandas as pd

with torch.no_grad():
    z_test = torch.randn(X_test_tensor.size(0), X_test_tensor.size(1))  # Gerar ruído para o conjunto de teste
    generated_data = discriminator(z_test)  # Aqui, você gera dados com 35 colunas

# Selecionando apenas a coluna 'Yield strength', que é a última coluna
yield_strength_predictions = generated_data[:, -1]  # Seleciona a última coluna

# Convertendo para numpy e criando um DataFrame para visualização
yield_strength_predictions_np = yield_strength_predictions.numpy()
df_predictions = pd.DataFrame(yield_strength_predictions_np, columns=['Yield Strength Predictions'])

# Exibindo os resultados
print(df_predictions)

     Yield Strength Predictions
0                      0.464033
1                      0.460200
2                      0.459471
3                      0.456909
4                      0.451099
..                          ...
326                    0.450998
327                    0.440055
328                    0.458349
329                    0.458706
330                    0.467054

[331 rows x 1 columns]
