In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split


In [None]:
X_train = pd.read_csv('/content/X_Train_Data_Input.csv')
y_train = pd.read_csv('/content/Y_Train_Data_Target.csv')


In [None]:
X_train.head()

Unnamed: 0,ID,Column0,Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,...,Column12,Column13,Column14,Column15,Column16,Column17,Column18,Column19,Column20,Column21
0,ad1a67e4cbddc767a3456b0d94299b9e,2.0,2495,3726.0,0.678139,0.701403,-0.007468,0.43419,-0.015603,0.606265,...,0,0,0.001351,0.00339,0.0,0,0.0,0,0,0
1,7246d2f76ac0c217ec25e72ea5f014cb,0.0,2495,3454.0,0.45258,0.701403,-0.007468,1.554998,-0.015574,0.329946,...,0,0,0.001351,0.00339,0.0,0,0.0,0,0,0
2,22ba388e7dd14c13342c49e75fc29dda,2.0,2495,4543.0,-1.577453,-1.42954,-0.007469,-0.407939,-0.015607,-0.774979,...,1,1,0.001351,0.00339,0.0,0,0.0,0,0,0
3,59f9b981472d97342587fb3e6392aeb1,0.0,211,59.0,,,,-0.407939,-0.015607,-0.774979,...,0,0,,0.00339,0.0,0,1.0,0,0,0
4,f6317cf7ecf126859804eddff279aead,0.0,718,950.0,-2.028572,-1.855728,,-0.407939,-0.015607,-0.774979,...,0,0,,0.00339,0.0,0,0.0,0,0,0


In [None]:

X_train.isnull().sum()

Unnamed: 0,0
ID,0
Column0,9
Column1,0
Column2,0
Column3,126303
Column4,127710
Column5,167180
Column6,3850
Column7,0
Column8,3850


In [None]:
X_train.shape

(785133, 23)

In [None]:
X_train_new = X_train.drop(["Column9", "Column14","ID"], axis=1, errors='ignore')


In [None]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_train_new = imputer.fit_transform(X_train_new)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_new = scaler.fit_transform(X_train_new)

In [None]:
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.initializers import RandomNormal
import tensorflow.keras.backend as K
from sklearn.utils import shuffle

In [None]:
class cGAN():
    def __init__(self):
        self.latent_dim = 32
        self.out_shape = 20  # Adjusted to match the dataset
        self.num_classes = 2
        self.clip_value = 0.01
        optimizer = Adam(0.0002, 0.5)

        # build discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=['binary_crossentropy'],
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # build generator
        self.generator = self.build_generator()

        # generating new data samples
        noise = Input(shape=(self.latent_dim,))
        label = Input(shape=(1,))
        gen_samples = self.generator([noise, label])

        self.discriminator.trainable = False

        # passing gen samples through disc.
        valid = self.discriminator([gen_samples, label])

        # combining both models
        self.combined = Model([noise, label], valid)
        self.combined.compile(loss=['binary_crossentropy'],
                              optimizer=optimizer,
                              metrics=['accuracy'])
        self.combined.summary()

    def build_generator(self):
        init = RandomNormal(mean=0.0, stddev=0.02)
        model = Sequential()

        model.add(Dense(128, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(self.out_shape, activation='tanh'))  # Output shape is now 20
        model.summary()

        noise = Input(shape=(self.latent_dim,))
        label = Input(shape=(1,), dtype='int32')
        label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label))

        model_input = multiply([noise, label_embedding])
        gen_sample = model(model_input)

        return Model([noise, label], gen_sample, name="Generator")

    def build_discriminator(self):
        init = RandomNormal(mean=0.0, stddev=0.02)
        model = Sequential()

        model.add(Dense(512, input_dim=self.out_shape, kernel_initializer=init))
        model.add(LeakyReLU(alpha=0.2))

        model.add(Dense(256, kernel_initializer=init))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.4))

        model.add(Dense(128, kernel_initializer=init))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dropout(0.4))

        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        gen_sample = Input(shape=(self.out_shape,))
        label = Input(shape=(1,), dtype='int32')
        label_embedding = Flatten()(Embedding(self.num_classes, self.out_shape)(label))

        model_input = multiply([gen_sample, label_embedding])
        validity = model(model_input)

        return Model(inputs=[gen_sample, label], outputs=validity, name="Discriminator")

    def train(self, X_train, y_train, pos_index, neg_index, epochs, batch_size=32, sample_interval=50):
        # Initial optimizer instances
        optimizer = Adam(0.0002, 0.5)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # Train Discriminator with 8 samples from the positive class and the rest from the negative class
            idx1 = np.random.choice(pos_index, 8)
            idx0 = np.random.choice(neg_index, batch_size - 8)
            idx = np.concatenate((idx1, idx0))
            samples, labels = X_train[idx], y_train[idx]
            samples, labels = shuffle(samples, labels)

            # Sample noise as generator input
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a half batch of new images
            gen_samples = self.generator.predict([noise, labels])

            # Recompile the discriminator optimizer when switching trainable states
            self.discriminator.trainable = True
            self.discriminator.compile(loss=['binary_crossentropy'],
                                       optimizer=Adam(0.0002, 0.5),  # Recreate optimizer
                                       metrics=['accuracy'])

            # Train the Discriminator
            d_loss_real = self.discriminator.train_on_batch([samples, labels], valid)
            d_loss_fake = self.discriminator.train_on_batch([gen_samples, labels], fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Recompile the combined model optimizer
            self.discriminator.trainable = False
            self.combined.compile(loss=['binary_crossentropy'],
                                  optimizer=Adam(0.0002, 0.5),  # Recreate optimizer
                                  metrics=['accuracy'])


            sampled_labels = np.random.randint(0, 2, batch_size).reshape(-1, 1)
            g_loss = self.combined.train_on_batch([noise, sampled_labels], valid)

            if (epoch + 1) % sample_interval == 0:
                print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100 * d_loss[1]}] [G loss: {g_loss}]")

In [None]:
cgan= cGAN()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
y_train = y_train.drop(["ID"], axis=1, errors='ignore')

In [None]:
y_train.value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
0,711100
1,74033


In [None]:
y_train_np = np.array(y_train)

In [None]:
pos_index = np.where(y_train==1)[0]
neg_index = np.where(y_train==0)[0]
cgan.train(X_train_new, y_train_np, pos_index,neg_index, epochs=100)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17

 generate synthetic samples to achieve a specific ratio between the two classes. For example, if you want a 2:1 ratio of Class 0 to Class 1
 # Number of samples to generate=(711,100/2)−74,033=281,517

In [None]:
n = 281517  # Number of synthetic samples to generate
noise = np.random.normal(0, 1, (n, 32))  # Adjust to match your latent_dim
sampled_labels = np.ones(n).reshape(-1, 1)  # Generating samples for Class 1


In [None]:
gen_samples = cgan.generator.predict([noise, sampled_labels])


[1m8798/8798[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step


In [None]:
print(gen_samples.shape)  # Should be (n, out_shape)


(281517, 20)


In [None]:
gen_samples = scaler.inverse_transform(gen_samples)


In [None]:
gen_samples.shape

(281517, 20)

In [None]:

column_names = [f"col_{i}" for i in range(gen_samples.shape[1])]

gen_samples_df = pd.DataFrame(gen_samples, columns=column_names)


In [None]:
gen_samples_df['label'] = 1

print(gen_samples_df.head())

      col_0       col_1       col_2     col_3     col_4     col_5     col_6  \
0  0.698778  421.636261  810.057739  0.829179 -0.240224 -0.479210 -0.451620   
1  1.577255  454.446198  812.271851  0.019753 -0.277022 -0.244672 -0.534951   
2  1.515350  436.110565  808.592651  0.720687  0.242697 -0.832778  0.316081   
3  1.546751  631.978027  825.041870 -0.083859  0.851728 -0.854244 -0.623296   
4  1.567316  450.661224  812.158325 -0.306066  0.387258 -0.398624 -0.977862   

      col_7     col_8     col_9  ...    col_11    col_12    col_13    col_14  \
0 -1.011667 -1.043782  0.622479  ... -0.117433  0.497455  1.199237  0.020551   
1 -0.882108 -1.050787  0.663578  ... -0.117216  0.711109  1.196053  0.008910   
2 -0.750346 -1.052443  0.632089  ... -0.116837  0.734473  1.226544  0.028946   
3 -0.936345 -1.053743  0.667366  ... -0.115615  0.784540  1.177224  0.032842   
4 -0.469402 -1.050499  0.654247  ... -0.099254  0.129228 -0.270037  0.033665   

     col_15    col_16    col_17    col_18   

In [None]:

gen_samples_df.to_csv('generated_samples.csv', index=False)
