<a href="https://colab.research.google.com/github/nasibehmohammadi/Thesis/blob/main/gan_lstm_d2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, RepeatVector, TimeDistributed
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

file_path = "/content/kaggleP.csv"

df = pd.read_csv(file_path)

X = df.drop(columns=['A'])
y = df['A']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_class0 = X_scaled[y == 0]
X_class1 = X_scaled[y == 1]

latent_dim = 10

def build_generator():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(1, latent_dim)),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dense(X_class0.shape[1], activation='tanh'),
        RepeatVector(1)
    ])
    return model

def build_discriminator():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(1, X_class0.shape[1])),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dense(1, activation='sigmoid')
    ])
    model.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5), metrics=['accuracy'])
    return model

generator = build_generator()
discriminator = build_discriminator()
discriminator.trainable = False

gan_input = tf.keras.Input(shape=(1, latent_dim))
gan_output = discriminator(generator(gan_input))
gan = tf.keras.Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=Adam(0.0002, 0.5))

batch_size = 64
epochs = 200

for epoch in range(epochs):
    noise = np.random.normal(0, 1, (batch_size, 1, latent_dim))
    generated_data = generator.predict(noise)

    idx = np.random.randint(0, X_class0.shape[0], batch_size)
    real_data = X_class0[idx].reshape(batch_size, 1, -1)

    real_labels = np.ones((batch_size, 1))
    fake_labels = np.zeros((batch_size, 1))

    d_loss_real = discriminator.train_on_batch(real_data, real_labels)
    d_loss_fake = discriminator.train_on_batch(generated_data, fake_labels)
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    noise = np.random.normal(0, 1, (batch_size, 1, latent_dim))
    g_loss = gan.train_on_batch(noise, real_labels)

    if epoch % 20 == 0:
        print(f"Epoch {epoch} | D Loss: {d_loss[0]:.4f} | G Loss: {g_loss:.4f}")

num_samples_needed = len(X_class1) - len(X_class0)
noise = np.random.normal(0, 1, (num_samples_needed, 1, latent_dim))
synthetic_data = generator.predict(noise).reshape(num_samples_needed, -1)

synthetic_data = scaler.inverse_transform(synthetic_data)

X_balanced = np.vstack((X_scaled, synthetic_data))
y_balanced = np.hstack((y, np.zeros(num_samples_needed)))

X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

classifier = Sequential([
    LSTM(64, return_sequences=True, input_shape=(1, X_train.shape[1])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dense(1, activation='sigmoid')
])
classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

X_train = X_train.reshape(-1, 1, X_train.shape[1])
X_test = X_test.reshape(-1, 1, X_test.shape[1])

classifier.fit(X_train, y_train, epochs=10, batch_size=64, verbose=1)

y_pred = (classifier.predict(X_test) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
f_score = f1_score(y_test, y_pred)

print(f"Balanced Dataset Accuracy: {accuracy:.4f}")
print(f"Balanced Dataset F1-Score: {f_score:.4f}")

balanced_df = pd.DataFrame(np.column_stack((y_balanced, X_balanced)), columns=['A'] + list(df.columns[1:]))
balanced_df.to_csv("/content/KaggleP_balanced.csv", index=False)

print("Balanced dataset saved as KaggleP_balanced.csv")


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step 




Epoch 0 | D Loss: 0.6907 | G Loss: 0.6932
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m2/2[0m [32m━━━━━

  super().__init__(**kwargs)


[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6527 - loss: 0.6761
Epoch 2/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9324 - loss: 0.2979
Epoch 3/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9717 - loss: 0.1190
Epoch 4/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9677 - loss: 0.1268
Epoch 5/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9677 - loss: 0.1272
Epoch 6/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9679 - loss: 0.1263
Epoch 7/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9690 - loss: 0.1227
Epoch 8/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9701 - loss: 0.1183
Epoch 9/10
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1