In [None]:
import tensorflow as tf

# Clear any existing GPU settings
tf.keras.backend.clear_session()

# Set TensorFlow to run on GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Allow memory growth to prevent OOM (Out-of-Memory) errors
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is set up successfully!")
    except RuntimeError as e:
        print(f"GPU Setup Error: {e}")

In [None]:
import tensorflow as tf
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from imblearn.over_sampling import ADASYN
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from PIL import Image
import shutil

In [None]:
# Define dataset paths
dataset1_path = "/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets"
dataset2_path = "/kaggle/input/iqothnccd-lung-cancer-dataset/The IQ-OTHNCCD lung cancer dataset/The IQ-OTHNCCD lung cancer dataset"

# Define target combined dataset path
combined_dataset_path = "/kaggle/working/Combined_Lung_Dataset"

# Mapping of original dataset folders to new 3-class structure
datasets = {
    "lung_n": "Normal",
    "Normal cases": "Normal",
    "Bengin cases": "Benign",
    "lung_aca": "Malignant",
    "lung_scc": "Malignant",
    "Malignant cases": "Malignant",
}

# Ensure target directories exist
for category in set(datasets.values()):
    os.makedirs(os.path.join(combined_dataset_path, category), exist_ok=True)

# Function to copy images while excluding text files
def copy_images(source_folder, target_folder):
    if os.path.exists(source_folder):
        for file in os.listdir(source_folder):
            if file.endswith((".jpg", ".png", ".jpeg")):  # Copy only image files
                shutil.copy(os.path.join(source_folder, file), os.path.join(target_folder, file))

# Copy files from dataset 1
for folder, category in datasets.items():
    source_folder = os.path.join(dataset1_path, folder)
    target_folder = os.path.join(combined_dataset_path, category)
    copy_images(source_folder, target_folder)

# Copy files from dataset 2
for folder, category in datasets.items():
    source_folder = os.path.join(dataset2_path, folder)
    target_folder = os.path.join(combined_dataset_path, category)
    copy_images(source_folder, target_folder)

# Remove empty folders
for category in set(datasets.values()):
    target_folder = os.path.join(combined_dataset_path, category)
    if len(os.listdir(target_folder)) == 0:  # If the folder is empty
        print(f"Removing empty folder: {target_folder}")
        shutil.rmtree(target_folder)

print("Dataset successfully merged into 3 classes: Normal, Benign, Malignant!")


In [None]:
# Define the path to the merged dataset
combined_dataset_path = "/kaggle/working/Combined_Lung_Dataset"

# Initialize a dictionary to store class-wise image count
class_counts = {}

# Loop through each class folder and count images
for class_name in os.listdir(combined_dataset_path):
    class_folder = os.path.join(combined_dataset_path, class_name)
    if os.path.isdir(class_folder):  # Ensure it's a folder
        num_images = len([file for file in os.listdir(class_folder) if file.endswith((".jpg", ".png", ".jpeg"))])
        class_counts[class_name] = num_images

# Display the result
for class_name, count in class_counts.items():
    print(f"{class_name}: {count} images")

# Total images across all classes
total_images = sum(class_counts.values())
print(f"\nTotal Images: {total_images}")


In [None]:
# === IMPORTS ===
import os
import shutil
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data import DataLoader
from PIL import Image
import zipfile
import random
import numpy as np
from torch.autograd import grad
import torch.nn.functional as F
from IPython.display import FileLink

# === CONFIG ===
combined_dataset_path = "/kaggle/working/Combined_Lung_Dataset"
benign_dir = os.path.join(combined_dataset_path, "Benign")
dummy_class_dir = os.path.join(benign_dir, "DummyClass")
image_size = 256
batch_size = 32
latent_dim = 100
epochs = 1200
lr = 1e-4
n_critic = 5
lambda_gp = 10

checkpoint_dir = "/kaggle/working/checkpoints_benign_gan"
sample_dir = "/kaggle/working/generated_benign"
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(sample_dir, exist_ok=True)

output_dir = "/kaggle/working/Synthetic_Benign_Images"
os.makedirs(output_dir, exist_ok=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === FIX FOLDER STRUCTURE ===
if not os.path.exists(dummy_class_dir):
    os.makedirs(dummy_class_dir)
    for file in os.listdir(benign_dir):
        file_path = os.path.join(benign_dir, file)
        if os.path.isfile(file_path) and file.lower().endswith(('.jpg', '.jpeg', '.png')):
            shutil.move(file_path, os.path.join(dummy_class_dir, file))

# === TRANSFORMS ===
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# === DATASET ===
dataset = datasets.ImageFolder(root=benign_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# === GENERATOR ===
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, 1024, 4, 1, 0),   # 4x4
            nn.BatchNorm2d(1024), nn.ReLU(True),
            nn.ConvTranspose2d(1024, 512, 4, 2, 1),          # 8x8
            nn.BatchNorm2d(512), nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1),           # 16x16
            nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1),           # 32x32
            nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1),            # 64x64
            nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 4, 2, 1),             # 128x128
            nn.BatchNorm2d(32), nn.ReLU(True),
            nn.ConvTranspose2d(32, 3, 4, 2, 1),              # 256x256
            nn.Tanh()
        )

    def forward(self, z):
        return self.model(z)

# === DISCRIMINATOR (CLEAN WITHOUT MINIBATCH DISCRIMINATION) ===
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1),
            nn.InstanceNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1),
            nn.InstanceNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, 4, 2, 1),
            nn.InstanceNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True)
        )

        dummy = torch.zeros(1, 3, image_size, image_size)
        with torch.no_grad():
            out = self.features(dummy)
        self.flat_features = out.view(1, -1).shape[1]
        print("[DEBUG] flat_features computed for final layer:", self.flat_features)

        self.final = nn.Linear(self.flat_features, 1)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.final(x)
# === GRADIENT PENALTY ===
def compute_gradient_penalty(D, real_samples, fake_samples):
    alpha = torch.rand(real_samples.size(0), 1, 1, 1, device=device)
    interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
    d_interpolates = D(interpolates)
    fake = torch.ones_like(d_interpolates, requires_grad=False)
    gradients = grad(outputs=d_interpolates, inputs=interpolates,
                     grad_outputs=fake, create_graph=True, retain_graph=True, only_inputs=True)[0]
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
    return gradient_penalty

# === INIT MODELS ===
netG = Generator().to(device)
netD = Discriminator().to(device)
optimizer_G = optim.Adam(netG.parameters(), lr=lr, betas=(0.5, 0.9))
optimizer_D = optim.Adam(netD.parameters(), lr=lr, betas=(0.5, 0.9))

fixed_noise = torch.randn(64, latent_dim, 1, 1, device=device)

# === TRAINING LOOP ===
for epoch in range(epochs):
    for i, (imgs, _) in enumerate(dataloader):
        real_imgs = imgs.to(device)
        batch_size = real_imgs.size(0)

        for _ in range(n_critic):
            z = torch.randn(batch_size, latent_dim, 1, 1, device=device)
            fake_imgs = netG(z).detach()

            real_validity = netD(real_imgs)
            fake_validity = netD(fake_imgs)
            gp = compute_gradient_penalty(netD, real_imgs, fake_imgs)
            d_loss = -torch.mean(real_validity) + torch.mean(fake_validity) + lambda_gp * gp

            optimizer_D.zero_grad()
            d_loss.backward()
            optimizer_D.step()

        z = torch.randn(batch_size, latent_dim, 1, 1, device=device)
        gen_imgs = netG(z)
        g_loss = -torch.mean(netD(gen_imgs))

        optimizer_G.zero_grad()
        g_loss.backward()
        optimizer_G.step()

        if i % 10 == 0:
            print(f"[Epoch {epoch}/{epochs}] [Batch {i}/{len(dataloader)}] Loss_D: {d_loss.item():.4f}, Loss_G: {g_loss.item():.4f}")

    save_image(netG(fixed_noise).detach().cpu(), os.path.join(sample_dir, f"epoch_{epoch}.png"), normalize=True)

    torch.save({
        'epoch': epoch,
        'netG': netG.state_dict(),
        'netD': netD.state_dict(),
        'optimizerG': optimizer_G.state_dict(),
        'optimizerD': optimizer_D.state_dict(),
    }, os.path.join(checkpoint_dir, 'checkpoint.pth'))

    print(f"\u2705 Checkpoint saved at epoch {epoch}")

# === GENERATE FINAL SYNTHETIC IMAGES ===
netG.eval()
num_generate = 5000
for i in range(num_generate):
    z = torch.randn(1, latent_dim, 1, 1, device=device)
    with torch.no_grad():
        img = netG(z).detach().cpu()
    save_image(img, os.path.join(output_dir, f"benign_{i:04d}.png"), normalize=True)

print(f"\u2705 {num_generate} synthetic benign images saved to {output_dir}")

# === ZIP CHECKPOINTS ===
output_zip = "/kaggle/working/checkpoints_benign_gan.zip"
with zipfile.ZipFile(output_zip, "w") as zipf:
    for root, _, files in os.walk(checkpoint_dir):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, checkpoint_dir)
            zipf.write(file_path, arcname)

print("\ud83d\udce6 Checkpoint zipped at: /kaggle/working/checkpoints_benign_gan.zip")
# Define the directory path
folder_path = "/kaggle/working/Synthetic_Benign_Images"
zip_path = "/kaggle/working/Synthetic_Benign_Images.zip"

# Zip the folder
shutil.make_archive(base_name=zip_path.replace('.zip', ''), format='zip', root_dir=folder_path)

print(f"✅ Folder zipped at: {zip_path}")
# Display download link
FileLink(zip_path)

In [None]:
import matplotlib.pyplot as plt
import os
from PIL import Image

# Directory containing synthetic images
generated_dir = "/kaggle/working/Synthetic_Benign_Images"

# Pick a few sample image filenames
sample_files = sorted(os.listdir(generated_dir))[:9]  # First 9 images

# Plot them in a 3x3 grid
plt.figure(figsize=(10, 10))
for i, file in enumerate(sample_files):
    img_path = os.path.join(generated_dir, file)
    img = Image.open(img_path)

    plt.subplot(3, 3, i + 1)
    plt.imshow(img)
    plt.title(file)
    plt.axis("off")

plt.tight_layout()
plt.show()

In [None]:
import shutil
from IPython.display import FileLink

# Step 1: Zip the folder
zip_path = '/kaggle/working/Synthetic_Benign_Images.zip'
shutil.make_archive(base_name='/kaggle/working/Synthetic_Benign_Images', 
                    format='zip', 
                    root_dir='/kaggle/working/Synthetic_Benign_Images')

# Step 2: Generate a download link
FileLink(zip_path)
