In [None]:
# this is generated by chatgpt for imbalanced dataset generation
import cv2
import numpy as np
import os
import random

# Output directory
output_dir = 'shapes_dataset'
classes = ['circle', 'square', 'triangle']
distribution = {'circle': 1000, 'square': 1000, 'triangle': 200}  # imbalance

image_size = 64

os.makedirs(output_dir, exist_ok=True)

def draw_shape(shape):
    img = np.ones((image_size, image_size, 3), dtype=np.uint8) * 255
    color = (0, 0, 0)

    if shape == 'circle':
        center = (random.randint(20, 44), random.randint(20, 44))
        radius = random.randint(10, 18)
        cv2.circle(img, center, radius, color, -1)

    elif shape == 'square':
        top_left = (random.randint(10, 30), random.randint(10, 30))
        size = random.randint(20, 30)
        bottom_right = (top_left[0] + size, top_left[1] + size)
        cv2.rectangle(img, top_left, bottom_right, color, -1)

    elif shape == 'triangle':
        pt1 = (random.randint(10, 54), random.randint(10, 54))
        pt2 = (pt1[0] + random.randint(-20, 20), pt1[1] + random.randint(20, 30))
        pt3 = (pt1[0] + random.randint(20, 30), pt1[1] + random.randint(-10, 10))
        pts = np.array([pt1, pt2, pt3], np.int32)
        pts = pts.reshape((-1, 1, 2))
        cv2.fillPoly(img, [pts], color)

    return img

for shape in classes:
    shape_dir = os.path.join(output_dir, shape)
    os.makedirs(shape_dir, exist_ok=True)
    for i in range(distribution[shape]):
        img = draw_shape(shape)
        cv2.imwrite(os.path.join(shape_dir, f'{shape}_{i}.png'), img)


In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.utils import save_image
from PIL import Image


In [6]:
# Device config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [51]:
# Hyperparameters
image_size = 64
batch_size = 64
nz = 100  # Noise vector size
ngf = 64  # Generator feature map size
ndf = 64  # Discriminator feature map size
num_epochs = 100
lr = 0.002
beta1 = 0.5

In [42]:
# Paths
data_path = "shapes_dataset/triangle"
output_path = "generated_triangles"
os.makedirs(output_path, exist_ok=True)


In [43]:
# Transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [44]:
# Dataset & Loader
dataset = ImageFolder(root="shapes_dataset", transform=transform)
triangle_dataset = [(x, y) for x, y in dataset if dataset.classes[y] == "triangle"]
triangle_loader = DataLoader(triangle_dataset, batch_size=batch_size, shuffle=True)

In [45]:
# Generator
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),

            nn.ConvTranspose2d(ngf, 1, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, x):
        return self.net(x)

In [46]:
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, ndf, 4, 2, 1, bias=False),      # (64x64) -> (32x32)
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), # (32x32) -> (16x16)
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), # (16x16) -> (8x8)
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False), # (8x8) -> (4x4)
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),       # (4x4) -> (1x1)
            nn.Sigmoid()
        )

    def forward(self, x):
        out = self.net(x)       # [batch_size, 1, 1, 1]
        return out.view(-1)     # [batch_size]


In [47]:
# Initialize models
netG = Generator().to(device)
netD = Discriminator().to(device)


In [48]:
# Loss and Optimizers
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))


In [49]:
# Fixed noise for sample generation
fixed_noise = torch.randn(64, nz, 1, 1, device=device)


In [None]:
# === Training Loop ===
print("Training started...")
for epoch in range(num_epochs):
    for i, (real_images, _) in enumerate(triangle_loader):
        real_images = real_images.to(device)
        b_size = real_images.size(0)

        # === Train Discriminator ===
        netD.zero_grad()
        real_labels = torch.full((b_size,), 1.0, dtype=torch.float, device=device)
        fake_labels = torch.full((b_size,), 0.0, dtype=torch.float, device=device)

        output_real = netD(real_images)
        loss_real = criterion(output_real, real_labels)

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake_images = netG(noise)
        output_fake = netD(fake_images.detach())
        loss_fake = criterion(output_fake, fake_labels)

        lossD = loss_real + loss_fake
        lossD.backward()
        optimizerD.step()

        # === Train Generator ===
        netG.zero_grad()
        # Train G to make D think fake is real
        output_fake = netD(fake_images)
        lossG = criterion(output_fake, real_labels)
        lossG.backward()
        optimizerG.step()

        if i % 50 == 0:
            print(f"[{epoch}/{num_epochs}] [{i}/{len(triangle_loader)}] "
                  f"Loss_D: {lossD.item():.4f} Loss_G: {lossG.item():.4f}")

    # Save sample fake images every 10 epochs
    if epoch % 10 == 0:
        with torch.no_grad():
            fake = netG(fixed_noise).detach().cpu()
            save_image(fake, f"{output_path}/epoch_{epoch}.png", normalize=True)


Training started...
[0/100] [0/4] Loss_D: 0.1461 Loss_G: 4.1360
[1/100] [0/4] Loss_D: 0.3735 Loss_G: 8.7362
[2/100] [0/4] Loss_D: 0.1692 Loss_G: 4.8289
[3/100] [0/4] Loss_D: 1.6713 Loss_G: 2.5475
[4/100] [0/4] Loss_D: 0.0339 Loss_G: 5.6038
[5/100] [0/4] Loss_D: 1.6036 Loss_G: 2.4022
[6/100] [0/4] Loss_D: 0.2120 Loss_G: 7.9103
[7/100] [0/4] Loss_D: 2.6689 Loss_G: 16.3701
[8/100] [0/4] Loss_D: 0.1643 Loss_G: 6.9605
[9/100] [0/4] Loss_D: 0.0545 Loss_G: 5.7760
[10/100] [0/4] Loss_D: 0.0385 Loss_G: 6.3179
[11/100] [0/4] Loss_D: 0.0320 Loss_G: 5.2424
[12/100] [0/4] Loss_D: 0.0266 Loss_G: 6.5838
[13/100] [0/4] Loss_D: 0.0086 Loss_G: 6.6885
[14/100] [0/4] Loss_D: 0.0860 Loss_G: 8.4630
[15/100] [0/4] Loss_D: 0.0087 Loss_G: 7.0740
[16/100] [0/4] Loss_D: 0.0070 Loss_G: 7.6404
[17/100] [0/4] Loss_D: 0.0796 Loss_G: 4.1758
[18/100] [0/4] Loss_D: 4.1056 Loss_G: 3.5858
[19/100] [0/4] Loss_D: 1.3472 Loss_G: 2.3188
[20/100] [0/4] Loss_D: 1.7326 Loss_G: 0.6613
[21/100] [0/4] Loss_D: 0.9212 Loss_G: 2.6708

In [38]:
# Save final models
torch.save(netG.state_dict(), "generator.pth")
torch.save(netD.state_dict(), "discriminator.pth")
print("Training complete. Models and sample images saved.")

Training complete. Models and sample images saved.


## now generating part

In [39]:
# Rebuild the Generator architecture
netG = Generator().to(device)
netG.load_state_dict(torch.load("generator.pth"))
netG.eval()


Generator(
  (net): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)

In [40]:
import os
from torchvision.utils import save_image

os.makedirs("synthetic_triangles", exist_ok=True)

n_samples = 500  # Number of images you want to generate
batch_size = 64
generated = 0

while generated < n_samples:
    noise = torch.randn(batch_size, nz, 1, 1, device=device)
    with torch.no_grad():
        fake_images = netG(noise)

    for i in range(fake_images.size(0)):
        if generated >= n_samples:
            break
        img = fake_images[i]
        save_image(img, f"synthetic_triangles/triangle_{generated:04d}.png", normalize=True)
        generated += 1

print(f"{n_samples} synthetic triangle images saved to 'synthetic_triangles/'")


500 synthetic triangle images saved to 'synthetic_triangles/'


Generator(
  (net): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 1, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): Tanh()
  )
)