In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Set paths
ids_dir = r'C:\Users\Mohamed\Desktop\projects\Byanaty\backend\base\dataset'
test_dir = r'C:\Users\Mohamed\Desktop\projects\Byanaty\backend\base\dataset\test'

# Function to perform train-test split and move images
def split_data_and_move(ids_dir, test_dir, test_size=0.2, random_state=42):
    # List all real and fake ID images
    real_images = os.listdir(os.path.join(ids_dir, 'real'))
    fake_images = os.listdir(os.path.join(ids_dir, 'fake'))
    
    # Split real and fake images into train and test sets
    train_real, test_real = train_test_split(real_images, test_size=test_size, random_state=random_state)
    train_fake, test_fake = train_test_split(fake_images, test_size=test_size, random_state=random_state)
    
    # Create test directory if it doesn't exist
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)
    
    # Create real and fake directories in test directory if they don't exist
    if not os.path.exists(os.path.join(test_dir, 'real')):
        os.makedirs(os.path.join(test_dir, 'real'))
    if not os.path.exists(os.path.join(test_dir, 'fake')):
        os.makedirs(os.path.join(test_dir, 'fake'))
    
    # Move real images to test directory
    for image in test_real:
        src = os.path.join(ids_dir, 'real', image)
        dst = os.path.join(test_dir, 'real', image)
        shutil.move(src, dst)
        
    # Move fake images to test directory
    for image in test_fake:
        src = os.path.join(ids_dir, 'fake', image)
        dst = os.path.join(test_dir, 'fake', image)
        shutil.move(src, dst)
    
    print(f"Train-Test split completed. Test images moved to {test_dir}")

# Perform train-test split
split_data_and_move(ids_dir, test_dir, test_size=0.2, random_state=42)

# Set the new directory paths after split
train_dir = ids_dir  # Training data remains in original directory
val_dir = test_dir  # Validation data moved to test directory


In [None]:
import numpy as np

# Function to calculate average dimensions of real ID images
def calculate_average_dimensions(directory):
    total_width = 0
    total_height = 0
    count = 0

    for filename in os.listdir(os.path.join(directory, 'real')):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(directory, 'real', filename)
            img = tf.keras.preprocessing.image.load_img(img_path)
            width, height = img.size
            total_width += width
            total_height += height
            count += 1
    
    if count > 0:
        avg_width = total_width // count
        avg_height = total_height // count
    else:
        avg_width, avg_height = 0, 0

    return avg_width, avg_height

# Calculate average dimensions for the resized images
IMG_WIDTH, IMG_HEIGHT = calculate_average_dimensions(train_dir)

# Generator function for GAN
def build_generator(latent_dim, img_shape):
    model = models.Sequential()
    model.add(layers.Dense(128 * 16 * 16, activation='relu', input_dim=latent_dim))
    model.add(layers.Reshape((16, 16, 128)))
    model.add(layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same', activation='relu'))
    model.add(layers.Conv2DTranspose(128, kernel_size=4, strides=2, padding='same', activation='relu'))
    model.add(layers.Conv2DTranspose(64, kernel_size=4, strides=2, padding='same', activation='relu'))
    model.add(layers.Conv2D(3, kernel_size=3, padding='same', activation='sigmoid'))
    model.summary()
    return model

# Discriminator function for GAN
def build_discriminator(img_shape):
    model = models.Sequential()
    model.add(layers.Conv2D(64, kernel_size=3, strides=2, input_shape=img_shape, padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.4))
    model.add(layers.Conv2D(128, kernel_size=3, strides=2, padding='same'))
    model.add(layers.LeakyReLU(alpha=0.2))
    model.add(layers.Dropout(0.4))
    model.add(layers.Flatten())
    model.add(layers.Dense(1, activation='sigmoid'))
    model.summary()
    return model

# Combined GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = models.Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

# Build the generator
generator = build_generator(latent_dim=100, img_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Build the discriminator
discriminator = build_discriminator(img_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Build the GAN
gan = build_gan(generator, discriminator)

# Compile the discriminator (only during training)
discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Compile the GAN (stacked generator and discriminator, only during training)
gan.compile(optimizer='adam', loss='binary_crossentropy')


In [None]:
# Data generators with augmentation for training and validation
train_datagen = ImageDataGenerator(rescale=1./255)

# Generate batches of real ID images
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=16,
    class_mode=None  # Generator will produce images, not classify them
)

# Train the GAN
epochs = 10
fixed_noise = np.random.normal(0, 1, (16, 100))

for epoch in range(epochs):
    for batch in train_generator:
        # Generate random noise as input to the generator
        noise = np.random.normal(0, 1, (16, 100))  # Batch size = 16, latent dim = 100
        
        # Generate fake images using the generator
        gen_images = generator.predict(noise)
        
        # Train the discriminator (real classified as ones and generated as zeros)
        d_loss_real = discriminator.train_on_batch(batch, np.ones((16, 1)))
        d_loss_fake = discriminator.train_on_batch(gen_images, np.zeros((16, 1)))
        
        # Train the generator (attempt to fool discriminator by classifying as real)
        g_loss = gan.train_on_batch(noise, np.ones((16, 1)))
        
        # Print progress
        print(f'Epoch: {epoch + 1}, [D loss real: {d_loss_real[0]}, D loss fake: {d_loss_fake[0]}, G loss: {g_loss}]')
        
        break  # Only train on one batch for simplicity, remove this break for full training

    # Optionally, save generated images
    if (epoch + 1) % 5 == 0:
        # Generate images from fixed noise for visualization
        gen_imgs = generator.predict(fixed_noise)
        # Save images or any other visualization steps

In [None]:
# Evaluate the discriminator on the test set for anomaly detection
def detect_anomalies(test_generator, discriminator):
    anomalies = []
    for batch in test_generator:
        # Predict using the discriminator
        predictions = discriminator.predict(batch)
        anomalies.extend(predictions)
    
    return anomalies

# Evaluate anomalies on the test set
test_generator = train_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=1,  # Evaluate one image at a time
    class_mode=None,  # No class labels needed for anomaly detection
    shuffle=False  # Ensure data is not shuffled
)

anomalies = detect_anomalies(test_generator, discriminator)
print(anomalies)

In [11]:
print(IMG_HEIGHT, IMG_WIDTH)

445 527
