In [1]:
# Importando as bibliotecas necessárias
import torch
from torch import nn, optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Importando o módulo .py com o Generator e Discriminator
from gan import Discriminator, Generator  # Substitua 'seu_arquivo' pelo nome do arquivo .py
# Carregando os dados
df = pd.read_csv('data_gan.csv')
target_variable_names=["Yield strength / MPa",
        "Ultimate tensile strength / MPa",
        "Elongation / %",
        "Reduction of Area / %",
        "Charpy temperature / °C",
        "Charpy impact toughness / J"
        ,"Weld ID",
        "AC or DC",
        "Electrode positive or negative",
        "Type of weld"]
# Separando os dados
# Vamos supor que 'Yield strength' é o alvo e as outras duas colunas são as entradas
X = df.drop(columns=target_variable_names)

# y vai ser a coluna alvo 'Yield strength / MPa'
y = df['Yield strength / MPa']

# Tratamento de NaNs (vamos ignorá-los neste caso para simular dados incompletos)
X = X.fillna(0)  # Substitua por algum método apropriado se necessário
y = y.fillna(0)

# Convertendo para tensores
X_train = torch.tensor(X.values, dtype=torch.float32)
y_train = torch.tensor(y.values, dtype=torch.float32).view(-1, 1)



In [2]:
# Inicializando o generator e discriminator
generator = Generator(size = X_train.shape[1])  # Tamanho baseado no número de features
discriminator = Discriminator(size = X_train.shape[1])

# Definindo otimizadores
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

# Função de perda
criterion = nn.BCELoss()

# Função de treinamento da GAN
def train_GAN(epochs=10000, print_every=1000):
    for epoch in range(epochs):
        # Gerar ruído
        z = torch.randn(X_train.size(0), X_train.size(1))  # Ruído no formato das features
        
        # Gerar amostras falsas (saídas do generator)
        fake_data = generator(z)
        
        # Treinando o Discriminator
        real_preds = discriminator(X_train)
        fake_preds = discriminator(fake_data.detach())
        
        # Definindo rótulos
        real_labels = torch.ones(X_train.size(0), 1)
        fake_labels = torch.zeros(X_train.size(0), 1)
        
        # Calculando a perda do Discriminator
        loss_real = criterion(real_preds, real_labels)
        loss_fake = criterion(fake_preds, fake_labels)
        loss_D = (loss_real + loss_fake) / 2
        
        # Otimizando o Discriminator
        optimizer_D.zero_grad()
        loss_D.backward()
        optimizer_D.step()
        
        # Treinando o Generator
        fake_preds = discriminator(fake_data)
        loss_G = criterion(fake_preds, real_labels)  # O generator quer enganar o discriminator
        
        # Otimizando o Generator
        optimizer_G.zero_grad()
        loss_G.backward()
        optimizer_G.step()
        
        # Exibir o progresso
        if epoch % print_every == 0:
            print(f'Epoch {epoch}/{epochs} - Loss D: {loss_D.item()}, Loss G: {loss_G.item()}')

# Treinar a GAN
train_GAN(epochs=1000, print_every=10)

# Gerando novas amostras
with torch.no_grad():
    z = torch.randn(100, X_train.size(1))
    generated_data = generator(z)
    print("generated data: ",generated_data)

Epoch 0/1000 - Loss D: 2.026888370513916, Loss G: 0.7004261016845703
Epoch 10/1000 - Loss D: 0.3482334315776825, Loss G: 0.6927841305732727
Epoch 20/1000 - Loss D: 0.3492352366447449, Loss G: 0.6895255446434021
Epoch 30/1000 - Loss D: 0.3501451015472412, Loss G: 0.6883739233016968
Epoch 40/1000 - Loss D: 0.34801703691482544, Loss G: 0.6939952373504639
Epoch 50/1000 - Loss D: 0.33919817209243774, Loss G: 0.7144559621810913
Epoch 60/1000 - Loss D: 0.3235291838645935, Loss G: 0.7528449892997742
Epoch 70/1000 - Loss D: 0.3064330816268921, Loss G: 0.7922957539558411
Epoch 80/1000 - Loss D: 0.2908584177494049, Loss G: 0.833342969417572
Epoch 90/1000 - Loss D: 0.2728399634361267, Loss G: 0.8828449845314026
Epoch 100/1000 - Loss D: 0.2561086416244507, Loss G: 0.9312447905540466
Epoch 110/1000 - Loss D: 0.2528555691242218, Loss G: 0.9403455853462219
Epoch 120/1000 - Loss D: 0.2618933618068695, Loss G: 0.9137650728225708
Epoch 130/1000 - Loss D: 0.2703411877155304, Loss G: 0.8941965699195862
Epo

In [8]:
# Dividindo o dataset em treino e teste
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from torch.utils.data import DataLoader, TensorDataset

generator = Generator(size = X_train.shape[1])  # Tamanho baseado no número de features
discriminator = Discriminator(size = X_train.shape[1])

# X são as features e y é a coluna alvo 'Yield strength'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Convertendo os dados em tensores
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)

X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Função de treinamento da GAN (mesma de antes)
def train_GAN(epochs=10000, print_every=1000):
    for epoch in range(epochs):
        for real_data, _ in train_loader:  # Itera sobre batches
            # Gerar ruído
            z = torch.randn(real_data.size(0), real_data.size(1))  # Ruído no formato das features
            
            # Gerar amostras falsas (saídas do generator)
            fake_data = generator(z)
            
            # Treinando o Discriminator
            real_preds = discriminator(real_data)
            fake_preds = discriminator(fake_data.detach())
            
            # Definindo rótulos
            real_labels = torch.ones(real_data.size(0), 1)
            fake_labels = torch.zeros(real_data.size(0), 1)
            
            # Calculando a perda do Discriminator
            loss_real = criterion(real_preds, real_labels)
            loss_fake = criterion(fake_preds, fake_labels)
            loss_D = (loss_real + loss_fake) / 2
            
            # Otimizando o Discriminator
            optimizer_D.zero_grad()
            loss_D.backward()
            optimizer_D.step()
            
            # Treinando o Generator
            fake_preds = discriminator(fake_data)
            loss_G = criterion(fake_preds, real_labels)  # O generator quer enganar o discriminator
            
            # Otimizando o Generator
            optimizer_G.zero_grad()
            loss_G.backward()
            optimizer_G.step()
        
        # Exibir o progresso
        if epoch % print_every == 0:
            print(f'Epoch {epoch}/{epochs} - Loss D: {loss_D.item()}, Loss G: {loss_G.item()}')

# Treinar a GAN
train_GAN(epochs=300, print_every=10)

# Avaliação do Generator após o treino
with torch.no_grad():
    z_test = torch.randn(X_test_tensor.size(0), X_test_tensor.size(1))  # Gerar ruído para o conjunto de teste
    generated_data = discriminator(z_test) #ultuma linha é o y

# Comparar os resultados gerados com o conjunto de teste real
print(generated_data)
y_pred = generated_data.numpy()
y_true = y_test_tensor.numpy()
print(f'Shape of y_true: {y_true.shape}')
print(f'Shape of y_pred: {y_pred.shape}')

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MSE: {mse}, MAE: {mae}, R²: {r2}')

Epoch 0/300 - Loss D: 3.00421404838562, Loss G: 0.770463228225708
Epoch 10/300 - Loss D: 3.117750644683838, Loss G: 0.7702500820159912
Epoch 20/300 - Loss D: 3.03877329826355, Loss G: 0.770110547542572
Epoch 30/300 - Loss D: 3.5929601192474365, Loss G: 0.7704694867134094
Epoch 40/300 - Loss D: 3.6486451625823975, Loss G: 0.7706210613250732
Epoch 50/300 - Loss D: 3.0697877407073975, Loss G: 0.7704582214355469
Epoch 60/300 - Loss D: 3.9120967388153076, Loss G: 0.7699591517448425
Epoch 70/300 - Loss D: 2.9404056072235107, Loss G: 0.7705283761024475
Epoch 80/300 - Loss D: 3.3941307067871094, Loss G: 0.7703944444656372
Epoch 90/300 - Loss D: 3.3264594078063965, Loss G: 0.7703354954719543
Epoch 100/300 - Loss D: 3.4735705852508545, Loss G: 0.770569920539856
Epoch 110/300 - Loss D: 2.918048858642578, Loss G: 0.7705375552177429
Epoch 120/300 - Loss D: 2.814788341522217, Loss G: 0.7708379030227661
Epoch 130/300 - Loss D: 2.910727024078369, Loss G: 0.7705462574958801
Epoch 140/300 - Loss D: 3.39

In [9]:
import pandas as pd

with torch.no_grad():
    z_test = torch.randn(X_test_tensor.size(0), X_test_tensor.size(1))  # Gerar ruído para o conjunto de teste
    generated_data = discriminator(z_test)  # Aqui, você gera dados com 35 colunas

# Selecionando apenas a coluna 'Yield strength', que é a última coluna
yield_strength_predictions = generated_data[:, -1]  # Seleciona a última coluna

# Convertendo para numpy e criando um DataFrame para visualização
yield_strength_predictions_np = yield_strength_predictions.numpy()
df_predictions = pd.DataFrame(yield_strength_predictions_np, columns=['Yield Strength Predictions'])

# Exibindo os resultados
print(df_predictions)

     Yield Strength Predictions
0                      0.464033
1                      0.460200
2                      0.459471
3                      0.456909
4                      0.451099
..                          ...
326                    0.450998
327                    0.440055
328                    0.458349
329                    0.458706
330                    0.467054

[331 rows x 1 columns]
