# example 1 on Iris dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

import jax.numpy as jnp
from scipy.sparse import csr_matrix

In [None]:
# Load iris dataset
iris = datasets.load_iris()
X = iris.data  # we only take the first four features.
y = iris.target

# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Convert data to pandas DataFrame
real_data = pd.DataFrame(X, columns=['a', 'b', 'c', 'd'])
real_labels = y

# One hot encode labels
one_hot_encoder = OneHotEncoder()
# dense_array = one_hot_encoder.toarray()
one_hot_labels = one_hot_encoder.fit_transform(np.array(real_labels).reshape(-1, 1))

# Constants
NOISE_DIM = 100
NUM_CLASSES = 3
NUM_FEATURES = 4
BATCH_SIZE = 64
TRAINING_STEPS = 10000

# Generator
def create_generator():
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([noise_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(NUM_FEATURES, activation='linear')(hidden)
    model = Model(inputs=[noise_input, class_input], outputs=output)
    return model

# Discriminator
def create_discriminator():
    data_input = Input(shape=(NUM_FEATURES,))
    class_input = Input(shape=(NUM_CLASSES,))
    merged_input = Concatenate()([data_input, class_input])
    hidden = Dense(128, activation='relu')(merged_input)
    output = Dense(1, activation='sigmoid')(hidden)
    model = Model(inputs=[data_input, class_input], outputs=output)
    return model

# cGAN
def create_cgan(generator, discriminator):
    noise_input = Input(shape=(NOISE_DIM,))
    class_input = Input(shape=(NUM_CLASSES,))
    generated_data = generator([noise_input, class_input])
    validity = discriminator([generated_data, class_input])
    model = Model(inputs=[noise_input, class_input], outputs=validity)
    return model

# Create and compile the Discriminator
discriminator = create_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam())

# Create the Generator
generator = create_generator()

# Create the GAN
gan = create_cgan(generator, discriminator)

# Ensure that only the generator is trained
discriminator.trainable = False

gan.compile(loss='binary_crossentropy', optimizer=Adam())

# Train GAN
for step in range(TRAINING_STEPS):
    # Select a random batch of real data with labels
    idx = np.random.randint(0, real_data.shape[0], BATCH_SIZE)
    real_batch = real_data.iloc[idx].values
    labels_batch = one_hot_labels[idx]

    # Generate a batch of new data
    noise = np.random.normal(0, 1, (BATCH_SIZE, NOISE_DIM))
    generated_batch = generator.predict([noise, labels_batch])

    # Train the discriminator
    real_loss = discriminator.train_on_batch([real_batch, labels_batch], np.ones((BATCH_SIZE, 1)))
    fake_loss = discriminator.train_on_batch([generated_batch, labels_batch], np.zeros((BATCH_SIZE, 1)))
    discriminator_loss = 0.5 * np.add(real_loss, fake_loss)

    # Train the generator
    generator_loss = gan.train_on_batch([noise, labels_batch], np.ones((BATCH_SIZE, 1)))

    if step % 100 == 0:
        print(f"Step: {step}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")

# Generate instances for a given class
def generate_data(generator, data_class, num_instances):
    one_hot_class = one_hot_encoder.transform(np.array([[data_class]]))
    noise = np.random.normal(0, 1, (num_instances, NOISE_DIM))
    generated_data = generator.predict([noise, np.repeat(one_hot_class, num_instances, axis=0)])
    return pd.DataFrame(generated_data, columns=['a', 'b', 'c', 'd'])

# Generate 40 instances of class 1
generated_data = generate_data(generator, 1, 40)


# example 2 on Iris dataset

In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load dữ liệu Iris và chuẩn hóa
iris = load_iris()
X = iris.data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
# Định nghĩa mạng sinh (Generator)
def build_generator(latent_dim, output_dim):
    input_layer = Input(shape=(latent_dim,))
    x = Dense(10, activation='relu')(input_layer)
    x = Dense(output_dim, activation='sigmoid')(x)  # Sử dụng sigmoid để phù hợp với giá trị của Iris dataset
    model = Model(inputs=input_layer, outputs=x)
    return model

In [4]:
# Định nghĩa mạng phân biệt (Discriminator)
def build_discriminator(input_dim):
    input_layer = Input(shape=(input_dim,))
    x = Dense(10, activation='relu')(input_layer)
    x = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=input_layer, outputs=x)
    return model

In [5]:
# Định nghĩa và huấn luyện mô hình GAN
def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = Input(shape=(latent_dim,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    gan = Model(inputs=gan_input, outputs=gan_output)
    gan.compile(optimizer='adam', loss='binary_crossentropy')
    return gan

In [6]:
# Tham số
latent_dim = 5  # Kích thước của vector ngẫu nhiên đầu vào cho Generator
output_dim = X.shape[1]  # Kích thước của output của Generator phải bằng số features của Iris dataset

# Xây dựng và compile Generator và Discriminator
generator = build_generator(latent_dim, output_dim)
discriminator = build_discriminator(output_dim)
discriminator.compile(optimizer=Adam(), loss='binary_crossentropy')
gan = build_gan(generator, discriminator)

# Compile mô hình GAN
gan.compile(optimizer='adam', loss='binary_crossentropy')


# Huấn luyện GAN
epochs = 10000
batch_size = 32

for epoch in range(epochs):
    # Sinh dữ liệu ngẫu nhiên
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
    generated_data = generator.predict(noise)

    # Lấy mẫu từ dữ liệu thật
    idx = np.random.randint(0, X_scaled.shape[0], batch_size)
    real_data = X_scaled[idx]

    # Xây dựng batch dữ liệu cho Discriminator
    X_batch = np.concatenate([real_data, generated_data])
    y_discriminator = np.zeros(2 * batch_size)
    y_discriminator[:batch_size] = 0.9  # Gán nhãn 0.9 cho dữ liệu thật để cải thiện đào tạo
    # mục đích của bước trên là có được nhãn của batch data mới sau khi gộp chúng lại, ok hiểu!

    # Huấn luyện Discriminator
    discriminator.trainable = True
    discriminator_loss = discriminator.train_on_batch(X_batch, y_discriminator) # X_batch là real + generated

    # Huấn luyện Generator thông qua GAN
    noise = np.random.normal(0, 1, size=(batch_size, latent_dim))
    y_generator = np.ones(batch_size)
    discriminator.trainable = False
    gan_loss = gan.train_on_batch(noise, y_generator)

    # In ra kết quả sau mỗi epoch
    if epoch % 10000 == 0:
        print(f"Epoch {epoch}, Discriminator Loss: {discriminator_loss}, Generator Loss: {gan_loss}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
Epoch 0, Discriminator Loss: 0.6908032298088074, Generator Loss: [array(0.69080323, dtype=float32), array(0.69080323, dtype=float32)]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 999us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 999us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 997us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1

In [7]:
# Sinh dữ liệu mới từ Generator
num_samples = 10
noise = np.random.normal(0, 1, size=(num_samples, latent_dim))
generated_data = generator.predict(noise)
print("Generated Data:")
print(generated_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Generated Data:
[[0.16136737 0.4684785  0.05390082 0.07081831]
 [0.1358455  0.49322852 0.04082815 0.02812279]
 [0.56254655 0.39389622 0.65721714 0.7142042 ]
 [0.6148605  0.4162937  0.7684482  0.89248645]
 [0.57814986 0.40442052 0.6885134  0.7838413 ]
 [0.48367435 0.4241563  0.5165667  0.63195044]
 [0.4766702  0.45681295 0.5116494  0.62766176]
 [0.5771973  0.40206268 0.70578676 0.808118  ]
 [0.6006613  0.38871628 0.739964   0.84218657]
 [0.21636929 0.48008886 0.09417335 0.14885229]]


In [None]:
# chuyển dữ liệu về dạng ban đầu
fake_data_original = scaler.inverse_transform(generated_data)
print(fake_data_original)

# example 3 on Iris dataset using pytorch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

In [2]:
# Load the Iris dataset
iris = load_iris()
data = iris.data

# Standardize the data
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Create PyTorch dataset
dataset = TensorDataset(torch.tensor(data, dtype=torch.float32))
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

In [4]:
# Define the Generator
class Generator(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, output_dim),
        )

    def forward(self, x):
        return self.model(x)

# Define the Discriminator
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(True),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)

In [5]:
# Initialize the models
input_dim = 4
latent_dim = 8

generator = Generator(latent_dim, input_dim)
discriminator = Discriminator(input_dim)

# Loss function and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

# Labels
real_label = 1.
fake_label = 0.

# Training loop
num_epochs = 5000

In [6]:
for epoch in range(num_epochs):
    for i, (real_data,) in enumerate(dataloader):
        batch_size = real_data.size(0)

        # Train Discriminator with real data
        optimizer_D.zero_grad()
        real_output = discriminator(real_data)
        real_loss = criterion(real_output, torch.full((batch_size, 1), real_label))
        real_loss.backward()

        # Train Discriminator with fake data
        noise = torch.randn(batch_size, latent_dim)
        fake_data = generator(noise)
        fake_output = discriminator(fake_data.detach())
        fake_loss = criterion(fake_output, torch.full((batch_size, 1), fake_label))
        fake_loss.backward()
        optimizer_D.step()

        # Train Generator
        optimizer_G.zero_grad()
        fake_output = discriminator(fake_data)
        generator_loss = criterion(fake_output, torch.full((batch_size, 1), real_label))
        generator_loss.backward()
        optimizer_G.step()

    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{num_epochs}]  Loss D: {real_loss.item() + fake_loss.item()}, Loss G: {generator_loss.item()}')

Epoch [0/5000]  Loss D: 1.326513409614563, Loss G: 0.6664145588874817
Epoch [100/5000]  Loss D: 1.3099476397037506, Loss G: 1.020888090133667
Epoch [200/5000]  Loss D: 1.3166534900665283, Loss G: 0.8884288668632507
Epoch [300/5000]  Loss D: 1.3153401017189026, Loss G: 0.8014025688171387
Epoch [400/5000]  Loss D: 1.4242733120918274, Loss G: 0.6960292458534241
Epoch [500/5000]  Loss D: 1.3088045716285706, Loss G: 0.7515724301338196
Epoch [600/5000]  Loss D: 1.3302380442619324, Loss G: 0.722888171672821
Epoch [700/5000]  Loss D: 1.2476725578308105, Loss G: 0.7150170207023621
Epoch [800/5000]  Loss D: 1.357801616191864, Loss G: 0.7120761871337891
Epoch [900/5000]  Loss D: 1.4842474460601807, Loss G: 0.6637597680091858
Epoch [1000/5000]  Loss D: 1.2260841727256775, Loss G: 0.9323500990867615
Epoch [1100/5000]  Loss D: 1.2655441761016846, Loss G: 0.871830403804779
Epoch [1200/5000]  Loss D: 1.2715622782707214, Loss G: 0.8298666477203369
Epoch [1300/5000]  Loss D: 1.1077720522880554, Loss G: 

In [7]:
# Generate some fake data after training
noise = torch.randn(10, latent_dim)
fake_data = generator(noise)
fake_data = scaler.inverse_transform(fake_data.detach().numpy())
print(fake_data)

[[6.169012   2.5455623  4.7291093  1.539046  ]
 [5.34374    3.8268483  1.3825036  0.22230116]
 [4.877947   2.7873714  3.6467104  1.2203213 ]
 [6.221972   2.9976625  4.7758346  1.5218576 ]
 [5.7032175  2.987758   5.099124   1.7631725 ]
 [4.7340083  3.277026   1.3111044  0.314005  ]
 [5.5474825  2.7334793  3.2860796  0.9483703 ]
 [5.1862555  2.0955536  3.0432475  1.0003636 ]
 [4.911922   3.6169164  1.4703826  0.29340795]
 [6.079893   3.3708344  4.6161323  1.5979748 ]]


# example 4 on Iris dataset

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU
from tensorflow.keras.optimizers import Adam
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [9]:
# Load the Iris dataset
iris = load_iris()
data = iris.data

# Standardize the data
scaler = StandardScaler()
data = scaler.fit_transform(data)

In [10]:
# Define the Generator model
def build_generator(latent_dim, output_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(output_dim, activation='linear'))
    return model

# Define the Discriminator model
def build_discriminator(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

In [13]:
# Parameters
latent_dim = 8
input_dim = data.shape[1]

# Build and compile the Discriminator
discriminator = build_discriminator(input_dim)
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])

# Build the Generator
generator = build_generator(latent_dim, input_dim)

# The GAN model (stacked generator and discriminator)
discriminator.trainable = False
gan_input = tf.keras.Input(shape=(latent_dim,))
generated_data = generator(gan_input)
gan_output = discriminator(generated_data)
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

# Training parameters
epochs = 5000
batch_size = 16
half_batch = batch_size // 2

# Training loop
for epoch in range(epochs):
    # Train Discriminator
    idx = np.random.randint(0, data.shape[0], half_batch)
    real_data = data[idx]

    noise = np.random.normal(0, 1, (half_batch, latent_dim))
    fake_data = generator.predict(noise)

    d_loss_real = discriminator.train_on_batch(real_data, np.ones((half_batch, 1)))
    d_loss_fake = discriminator.train_on_batch(fake_data, np.zeros((half_batch, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    valid_y = np.array([1] * batch_size)
    g_loss = gan.train_on_batch(noise, valid_y)

    # Print the progress
    if epoch % 100 == 0:
        print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100*d_loss[1]}] [G loss: {g_loss}]")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


0 [D loss: 0.727293848991394, acc.: 12.5] [G loss: [array(0.7142088, dtype=float32), array(0.7142088, dtype=float32), array(0.25, dtype=float32)]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 553us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 988us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/s

In [14]:
# Generate some fake data after training
noise = np.random.normal(0, 1, (10, latent_dim))
fake_data = generator.predict(noise)
fake_data = scaler.inverse_transform(fake_data)
print(fake_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[[ 26.202564   -5.913607   49.421463  -19.048    ]
 [ 21.831362   -3.9743388  39.86659   -14.806953 ]
 [ 25.8061     -5.947167   50.803703  -19.429209 ]
 [ 23.88265    -5.0110307  45.243526  -17.365185 ]
 [ 24.672415   -5.2306695  46.57941   -17.937708 ]
 [ 23.799175   -4.9752765  44.481995  -16.84533  ]
 [ 23.17277    -4.632434   43.444317  -16.351099 ]
 [ 23.42555    -4.818714   43.461044  -16.447546 ]
 [ 27.924765   -6.310674   53.110283  -21.02577  ]
 [ 22.394203   -4.185256   40.64933   -15.29101  ]]
