# Tabular GAN from Scratch using Pytorch

The below code is a modifications of the [Pytorch DCGAN tutorial](https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html).

In [40]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [41]:
df = pd.read_csv('../datasets/income/adult.csv')

In [42]:
df_dropped = df.drop('income', axis=1)

In [43]:
categorical_columns = [col for col in df_dropped.columns if df_dropped[col].dtype == 'object']
numerical_columns = [col for col in df_dropped.columns if df_dropped[col].dtype in ['int64', 'float64']]

In [46]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),
        ('cat', OneHotEncoder(sparse_output=False), categorical_columns),
    ])

In [47]:
X = preprocessor.fit_transform(df_dropped)
y = df['income'].apply(lambda x: 1 if x == '>50K' else 0).values

In [48]:
X_torch = torch.tensor(X, dtype=torch.float32)
y_torch = torch.tensor(y, dtype=torch.float32)

In [49]:
dataset = TensorDataset(X_torch, y_torch)

In [50]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [51]:
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, output_dim),
            nn.Tanh()
        )
    
    def forward(self, x):
        return self.net(x)

In [52]:
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.net(x)

In [53]:
noise_dim = 100
generator = Generator(input_dim=noise_dim, output_dim=X.shape[1])
discriminator = Discriminator(input_dim=X.shape[1])

In [54]:
criterion = nn.BCELoss()
optimizer_generator = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizer_discriminator = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

In [None]:
epochs = 50

for epoch in range(epochs):
    for batch in dataloader:
        # Update Discriminator with Real Data
        discriminator.zero_grad()
        real_data, _ = batch
        real_labels = torch.ones(real_data.size(0), 1)
        output_real = discriminator(real_data)
        loss_real = criterion(output_real, real_labels)
        
        # Update Discriminator with Fake Data
        noise = torch.randn(real_data.size(0), noise_dim)
        fake_data = generator(noise)
        fake_labels = torch.zeros(real_data.size(0), 1)
        output_fake = discriminator(fake_data.detach())
        loss_fake = criterion(output_fake, fake_labels)
        
        # Combine Losses for discriminator and Update
        loss_disc = loss_real + loss_fake
        loss_disc.backward()
        optimizer_discriminator.step()
        
        # Update Generator
        generator.zero_grad()
        output = discriminator(fake_data)
        loss_gen = criterion(output, real_labels)
        loss_gen.backward()
        optimizer_generator.step()

    print(f'Epoch {epoch+1}/{epochs}, Loss D: {loss_disc.item()}, Loss G: {loss_gen.item()}')
        

Epoch 1/50, Loss D: 0.6269813179969788, Loss G: 3.223327159881592
Epoch 2/50, Loss D: 0.7031475305557251, Loss G: 2.470341205596924
Epoch 3/50, Loss D: 0.23074853420257568, Loss G: 2.584080934524536
Epoch 4/50, Loss D: 0.0680033266544342, Loss G: 3.892197608947754
Epoch 5/50, Loss D: 0.4087280333042145, Loss G: 3.454986095428467
Epoch 6/50, Loss D: 0.08409431576728821, Loss G: 4.47928524017334
Epoch 7/50, Loss D: 0.03339972347021103, Loss G: 5.356021881103516
Epoch 8/50, Loss D: 0.028823552653193474, Loss G: 4.5678606033325195
Epoch 9/50, Loss D: 0.2053365260362625, Loss G: 4.6336283683776855
Epoch 10/50, Loss D: 0.07074583321809769, Loss G: 5.367283821105957
Epoch 11/50, Loss D: 0.04980750009417534, Loss G: 5.808785438537598
Epoch 12/50, Loss D: 0.21247978508472443, Loss G: 4.100690841674805
Epoch 13/50, Loss D: 0.15667684376239777, Loss G: 3.2365245819091797
Epoch 14/50, Loss D: 0.011997371912002563, Loss G: 4.736529350280762
Epoch 15/50, Loss D: 0.06772873550653458, Loss G: 4.726318