In [26]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LeakyReLU, Concatenate
from tensorflow.keras.optimizers import Adam

# Load California Housing dataset
data = fetch_california_housing()
X = data.data
feature=data.feature_names
X.shape

(20640, 8)

In [20]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
feature=X.feature_names

AttributeError: 'numpy.ndarray' object has no attribute 'feature_names'

In [12]:
def create_generator():
    noise_dim = 100
    noise_input = Input(shape=(noise_dim,))
    hidden = Dense(128, activation='LeakyReLU')(noise_input)
    output = Dense(X_scaled.shape[1], activation='linear')(hidden)
    model = Model(inputs=noise_input, outputs=output)
    return model


In [13]:
def create_discriminator():
    data_input = Input(shape=(X_scaled.shape[1],))
    hidden = Dense(128, activation='LeakyReLU')(data_input)
    output = Dense(1, activation='sigmoid')(hidden)
    model = Model(inputs=data_input, outputs=output)
    return model


In [14]:

# cGAN
def create_cgan(generator, discriminator):
    noise_input = Input(shape=(100,))
    generated_data = generator(noise_input)
    validity = discriminator(generated_data)
    model = Model(inputs=noise_input, outputs=validity)
    return model

In [15]:
# Create and compile the Discriminator
discriminator = create_discriminator()
discriminator.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

# Create the Generator
generator = create_generator()


gan = create_cgan(generator, discriminator)
# Ensure that only the generator is trained
discriminator.trainable = False

gan.compile(loss='binary_crossentropy', optimizer=Adam())


In [16]:
batch_size = 64
training_steps = 100

for step in range(training_steps):
    # Select a random batch of real data
    idx = np.random.randint(0, X_scaled.shape[0], batch_size)
    real_data = X_scaled[idx]

    # Generate a batch of new data
    noise = np.random.normal(0, 1, (batch_size, 100))
    generated_data = generator.predict(noise)

    # Train the discriminator
    real_loss = discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    fake_loss = discriminator.train_on_batch(generated_data, np.zeros((batch_size, 1)))
    discriminator_loss = 0.5 * np.add(real_loss, fake_loss)

    # Train the generator
    generator_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    if step % 1000 == 0:
        print(f"Step: {step}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")


Step: 0, Discriminator Loss: 0.666893720626831, Generator Loss: 0.7720139622688293


In [17]:
num_synthetic_samples = 1000
noise = np.random.normal(0, 1, (num_synthetic_samples, 100))
synthetic_data = generator.predict(noise)
synthetic_data_unscaled = scaler.inverse_transform(synthetic_data)




In [24]:
# Generate instances for a given class
def generate_data(generator, num_instances):
    noise = np.random.normal(0, 1, (num_instances, 100))
    generated_data = generator.predict(noise)
    return pd.DataFrame(generated_data,columns=feature)

# Generate 40 instances of class 1
generated_data = generate_data(generator, 40)
synthetic_data = pd.DataFrame(generated_data)
synthetic_data.to_csv(r'C:\Users\lia68085\test\synthetic_house_data.csv', index=False)

      MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \
0  -0.363010  0.284385 -0.854360  -0.565244   -0.828770  0.206482 -0.474941   
1  -0.225982 -0.054308 -0.462691  -0.961661   -0.016457 -0.161887 -0.588831   
2  -0.023404  0.709455 -0.746130  -0.478682   -0.679322  0.143626 -0.425006   
3   0.241273  0.391960 -0.561241   0.137048    0.256928 -0.643878 -0.158072   
4   0.898096  0.511746 -0.614307   0.167789   -0.723261 -0.587407 -0.423459   
5  -0.075284 -0.231068 -1.124324  -0.380837   -0.104739 -0.579997 -0.775675   
6   0.303988  0.349243 -0.479456   0.108758   -0.417100 -0.663867 -1.164432   
7  -0.060566  0.323776 -0.252997   0.227646   -0.532423 -0.616802 -0.502853   
8  -0.682518  0.042494 -0.819971  -1.072586    0.644965  0.037707  0.485268   
9   0.467033  0.968230  0.349988  -0.385955   -0.767810  0.347050 -0.246655   
10  1.060199  0.328364  0.041503   0.696764   -0.747908  0.457307 -0.261987   
11  0.198800  0.001134 -0.132606  -0.213431    0.295

In [None]:
# Assuming synthetic_data_unscaled is your generated dataset
# And assuming the last column of the dataset represents median house values

# Define a threshold for categorizing high and low value areas
value_threshold = np.median(synthetic_data_unscaled[:, -1])

# Assign labels based on the threshold
labels = ['High' if value > value_threshold else 'Low' for value in synthetic_data_unscaled[:, -1]]

# Add labels to your DataFrame
import pandas as pd
synthetic_df = pd.DataFrame(synthetic_data_unscaled, columns=data.feature_names)
synthetic_df['Label'] = labels
