In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load dataset
file_path = "Concrete_Data - Sheet1.csv"  # Update the path if needed
df = pd.read_csv(file_path)

# Select features (all columns except target)
X = df.drop(columns=['Concrete compressive strength'])

# Normalize data for GANs (Min-Max Scaling to [0, 1])
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)

# Define dimensions
latent_dim = 16  # Noise vector size

# Build Generator
def build_generator():
    model = keras.Sequential([
        layers.Dense(64, activation="relu", input_shape=(latent_dim,)),
        layers.Dense(128, activation="relu"),
        layers.Dense(X_train.shape[1], activation="sigmoid")  # Output same as number of features
    ])
    return model

# Build Discriminator
def build_discriminator():
    model = keras.Sequential([
        layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
        layers.Dense(64, activation="relu"),
        layers.Dense(1, activation="sigmoid")  # Binary classification (real or fake)
    ])
    return model

# Create Generator and Discriminator
generator = build_generator()
discriminator = build_discriminator()

# Compile Discriminator
discriminator.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Build and Compile GAN
discriminator.trainable = False  # Freeze Discriminator during GAN training
gan_input = keras.Input(shape=(latent_dim,))
fake_data = generator(gan_input)
gan_output = discriminator(fake_data)

gan = keras.Model(gan_input, gan_output)
gan.compile(loss="binary_crossentropy", optimizer="adam")

# Train GAN
batch_size = 32
epochs = 5000

for epoch in range(epochs):
    # Generate fake samples
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    generated_data = generator.predict(noise)

    # Select real samples
    idx = np.random.randint(0, X_train.shape[0], batch_size)
    real_data = X_train[idx]

    # Labels for real and fake data
    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))

    # Train Discriminator
    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train Generator
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    valid_labels = np.ones((batch_size, 1))  # Trick discriminator
    g_loss = gan.train_on_batch(noise, valid_labels)

    # Print training progress
    if epoch % 500 == 0:
        print(f"Epoch {epoch} - D Loss: {d_loss[0]:.4f}, G Loss: {g_loss:.4f}")

# Generate new synthetic data
noise = np.random.normal(0, 1, (10, latent_dim))  # Generate 10 synthetic samples
synthetic_data = generator.predict(noise)
synthetic_data = scaler.inverse_transform(synthetic_data)  # Convert back to original scale

print("\nGenerated Synthetic Concrete Data (First 5 Rows):")
print(pd.DataFrame(synthetic_data, columns=df.columns[:-1]).head())

# Plot Real vs Fake Data Distribution
plt.figure(figsize=(10, 5))
plt.hist(X_train.flatten(), bins=50, alpha=0.6, label="Real Data")
plt.hist(synthetic_data.flatten(), bins=50, alpha=0.6, label="Generated Data")
plt.legend()
plt.title("Real vs Generated Data Distribution")
plt.show()

