In [9]:
# Uncomment and run the appropriate command for your operating system, if required
# No installation is reqiured on Google Colab / Kaggle notebooks

# Linux / Binder / Windows (No GPU)
# !pip install numpy matplotlib torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html

# Linux / Windows (GPU)
# pip install numpy matplotlib torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
 
# MacOS (NO GPU)
# !pip install numpy matplotlib torch torchvision torchaudio

# Installing necessary packages

In [10]:
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

required_packages = ['numpy', 'matplotlib', 'torch', 'torchvision', 'torchaudio', 'transformers', 'pillow', 'json']

for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        install(package)



In [11]:
import os
import torch
import json
import torch.nn as nn
import re
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, utils
from PIL import Image, ImageFilter, ImageEnhance
import shutil
import matplotlib.pyplot as plt

# Data Preprocessing

In [12]:
import os
from PIL import Image, ImageFilter, ImageEnhance

base_dir = 'dataset'
image_folders = ['nomadic', 'cubism', 'print', 'gray']
output_image_dir = os.path.join(base_dir, 'images')
output_text_dir = os.path.join(base_dir, 'text')

os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_text_dir, exist_ok=True)

filters = [
    ('BLUR', ImageFilter.BLUR),
    ('CONTOUR', ImageFilter.CONTOUR),
    ('DETAIL', ImageFilter.DETAIL),
    ('EDGE_ENHANCE', ImageFilter.EDGE_ENHANCE),
    ('EDGE_ENHANCE_MORE', ImageFilter.EDGE_ENHANCE_MORE),
    ('EMBOSS', ImageFilter.EMBOSS),
    ('FIND_EDGES', ImageFilter.FIND_EDGES),
    ('SHARPEN', ImageFilter.SHARPEN),
    ('SMOOTH', ImageFilter.SMOOTH),
    ('SMOOTH_MORE', ImageFilter.SMOOTH_MORE)
]
enhancements = [
    ('BRIGHTNESS', ImageEnhance.Brightness, 1.2),
    ('CONTRAST', ImageEnhance.Contrast, 1.2),
    ('COLOR', ImageEnhance.Color, 1.2),
    ('SHARPNESS', ImageEnhance.Sharpness, 1.2)
]
additional_filters = [
    ('GAUSSIAN_BLUR', ImageFilter.GaussianBlur(radius=2)),
    ('UNSHARP_MASK', ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3)),
    ('MEDIAN_FILTER', ImageFilter.MedianFilter(size=3)),
    ('MIN_FILTER', ImageFilter.MinFilter(size=3)),
    ('MAX_FILTER', ImageFilter.MaxFilter(size=3)),
    ('MODE_FILTER', ImageFilter.ModeFilter(size=3))
]

all_operations = filters + enhancements + additional_filters

# Initialize the dictionary
image_to_class = {}

def apply_operations(image, output_dir, base_filename, counter, folder):
    original_image_path = os.path.join(output_dir, f"{base_filename}{counter:03d}.png")
    image.save(original_image_path)
    image_to_class[f"{base_filename}{counter:03d}.png"] = folder
    counter += 1

    for operation_name, operation in filters:
        modified_image = image.filter(operation)
        modified_image_path = os.path.join(output_dir, f"{base_filename}{counter:03d}.png")
        modified_image.save(modified_image_path)
        image_to_class[f"{base_filename}{counter:03d}.png"] = folder
        counter += 1

    for operation_name, operation, factor in enhancements:
        enhancer = operation(image)
        modified_image = enhancer.enhance(factor)
        modified_image_path = os.path.join(output_dir, f"{base_filename}{counter:03d}.png")
        modified_image.save(modified_image_path)
        image_to_class[f"{base_filename}{counter:03d}.png"] = folder
        counter += 1

    for operation_name, operation in additional_filters:
        modified_image = image.filter(operation)
        modified_image_path = os.path.join(output_dir, f"{base_filename}{counter:03d}.png")
        modified_image.save(modified_image_path)
        image_to_class[f"{base_filename}{counter:03d}.png"] = folder
        counter += 1

    return counter

def clean_directory(directory):
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path) and '.' in item:
            os.remove(item_path)

image_counter = 1

clean_directory(output_image_dir)
clean_directory(output_text_dir)

for folder in image_folders:
    current_folder_path = os.path.join(base_dir, 'images', folder)
    for filename in os.listdir(current_folder_path):
        if filename.lower().endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
            image_path = os.path.join(current_folder_path, filename)
            image = Image.open(image_path)
            base_filename = "artwork"

            image_counter = apply_operations(image, output_image_dir, base_filename, image_counter, folder)

            # Create corresponding text file
            for i in range(image_counter - 21, image_counter):
                txt_filename = f"{base_filename}{i:03d}.txt"
                with open(os.path.join(output_text_dir, txt_filename), 'w') as f:
                    f.write(folder)

print("Processing complete.")
print(image_to_class)

Processing complete.
{'artwork001.png': 'nomadic', 'artwork002.png': 'nomadic', 'artwork003.png': 'nomadic', 'artwork004.png': 'nomadic', 'artwork005.png': 'nomadic', 'artwork006.png': 'nomadic', 'artwork007.png': 'nomadic', 'artwork008.png': 'nomadic', 'artwork009.png': 'nomadic', 'artwork010.png': 'nomadic', 'artwork011.png': 'nomadic', 'artwork012.png': 'nomadic', 'artwork013.png': 'nomadic', 'artwork014.png': 'nomadic', 'artwork015.png': 'nomadic', 'artwork016.png': 'nomadic', 'artwork017.png': 'nomadic', 'artwork018.png': 'nomadic', 'artwork019.png': 'nomadic', 'artwork020.png': 'nomadic', 'artwork021.png': 'nomadic', 'artwork022.png': 'nomadic', 'artwork023.png': 'nomadic', 'artwork024.png': 'nomadic', 'artwork025.png': 'nomadic', 'artwork026.png': 'nomadic', 'artwork027.png': 'nomadic', 'artwork028.png': 'nomadic', 'artwork029.png': 'nomadic', 'artwork030.png': 'nomadic', 'artwork031.png': 'nomadic', 'artwork032.png': 'nomadic', 'artwork033.png': 'nomadic', 'artwork034.png': 'no

In [13]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class TextImageDataset(Dataset):
    def __init__(self, image_dir, image_to_class, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.image_files = sorted(os.listdir(image_dir))
        self.image_to_class = image_to_class
        self.class_to_idx = {'nomadic': 0, 'cubism': 1, 'print': 2, 'gray': 3}

    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        image = Image.open(os.path.join(self.image_dir, img_name))
        if image.mode != 'RGB':
            image = image.convert('RGB')
        if self.transform:
            image = self.transform(image)

        class_name = self.image_to_class[img_name]
        label = self.class_to_idx[class_name]
        label = torch.eye(len(self.class_to_idx))[label]  # One-hot encode the label

        return image, label

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

dataset = TextImageDataset(image_dir=output_image_dir, image_to_class=image_to_class, transform=transform)
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)

print("Dataset and DataLoader created successfully.")

Dataset and DataLoader created successfully.


# Feature Engineering
Defining the Generator and Discriminator networks

In [14]:
import torch
import torch.nn as nn

# Define Generator Class
class Generator(nn.Module):
    def __init__(self, latent_dim, text_dim, img_channels):
        super(Generator, self).__init__()
        self.text_embedding = nn.Linear(text_dim, latent_dim)
        self.gen = nn.Sequential(
            nn.ConvTranspose2d(latent_dim * 2, 512, 4, 1, 0),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, img_channels, 4, 2, 1),
            nn.Tanh()
        )

    def forward(self, noise, text):
        text_embedding = self.text_embedding(text).unsqueeze(2).unsqueeze(3)
        text_embedding = text_embedding.expand(noise.size(0), -1, noise.size(2), noise.size(3))
        x = torch.cat([noise, text_embedding], dim=1)
        return self.gen(x)

# Define Discriminator Class
class Discriminator(nn.Module):
    def __init__(self, img_channels, text_dim, hidden_dim, img_size=128):
        super(Discriminator, self).__init__()

        def conv_output_size(size, kernel_size=4, stride=2, padding=1):
            return (size - kernel_size + 2 * padding) // stride + 1

        self.img_size = img_size
        self.hidden_dim = hidden_dim

        current_size = img_size
        self.img_dis = nn.Sequential(
            nn.Conv2d(img_channels, hidden_dim // 4, 4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True)
        )
        current_size = conv_output_size(current_size, kernel_size=4, stride=2, padding=1)

        self.img_dis.add_module("conv2", nn.Conv2d(hidden_dim // 4, hidden_dim // 2, 4, stride=2, padding=1))
        self.img_dis.add_module("lrelu2", nn.LeakyReLU(0.2, inplace=True))
        current_size = conv_output_size(current_size, kernel_size=4, stride=2, padding=1)

        self.img_dis.add_module("conv3", nn.Conv2d(hidden_dim // 2, hidden_dim, 4, stride=2, padding=1))
        self.img_dis.add_module("lrelu3", nn.LeakyReLU(0.2, inplace=True))
        current_size = conv_output_size(current_size, kernel_size=4, stride=2, padding=1)

        self.text_embedding = nn.Linear(text_dim, hidden_dim * current_size * current_size)

        self.final = nn.Sequential(
            nn.Conv2d(hidden_dim + hidden_dim, hidden_dim, 3, 1, 1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(hidden_dim, 1, 3, 1, 1),
        )

    def forward(self, img, text):
        img_out = self.img_dis(img)
        text_embedding = self.text_embedding(text.float()).view(text.size(0), -1, img_out.size(2), img_out.size(3))
        x = torch.cat([img_out, text_embedding], dim=1)
        return self.final(x).view(img.size(0), -1)  # Ensuring the output size is [batch_size, 1]

In [15]:
# text_dim = 4  # Number of classes for one-hot encoding
# latent_dim = 100
# hidden_dim = 512
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# generator = Generator(latent_dim, text_dim, 3).to(device)
# discriminator = Discriminator(3, text_dim, hidden_dim, img_size=128).to(device)

# Training the Model
The generator and discriminator are trained here using a loop.
## Optimization
The optimization is embedded within the training loop. The code uses Adam optimizer to update the weights of both the generator and the discriminator.
## Evaluation
Evaluation happens within the training loop where loss values for both the generator and the discriminator are printed every 100 steps. Additionally, generated sample images are saved at the end of each epoch to visualize the generator’s progress.

In [16]:
import torch.optim as optim
import torchvision.utils as utils
import os

# Training Function
epochs = 50  # number of epochs
lr = 0.0001  # learning rate
latent_dim = 100  # latent dimension for the generator
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_gan(generator, discriminator, dataloader, epochs, lr, device):
    criterion = nn.BCEWithLogitsLoss()
    optim_g = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
    optim_d = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

    for epoch in range(epochs):
        for i, (images, labels) in enumerate(dataloader):
            batch_size = images.size(0)
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            images = images.to(device)
            labels = labels.to(device)

            # Train Discriminator
            optim_d.zero_grad()
            outputs = discriminator(images, labels)
            real_loss = criterion(outputs, real_labels)
            real_loss.backward()

            noise = torch.randn(batch_size, latent_dim, 1, 1).to(device)
            fake_images = generator(noise, labels)
            outputs = discriminator(fake_images.detach(), labels)
            fake_loss = criterion(outputs, fake_labels)
            fake_loss.backward()
            optim_d.step()

            # Train Generator
            optim_g.zero_grad()
            outputs = discriminator(fake_images, labels)
            g_loss = criterion(outputs, real_labels)
            g_loss.backward()
            optim_g.step()

            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch + 1}/{epochs}], Step [{i + 1}/{len(dataloader)}], '
                      f'D Loss: {real_loss.item() + fake_loss.item():.4f}, G Loss: {g_loss.item():.4f}')

        with torch.no_grad():
            sample_noise = torch.randn(batch_size, latent_dim, 1, 1).to(device)
            sample_images = generator(sample_noise, labels)
            os.makedirs('samples', exist_ok=True)
            utils.save_image(sample_images, f'samples/sample_epoch_{epoch + 1}.png', nrow=8, normalize=True)

    torch.save(generator.state_dict(), 'generator.pth')
    torch.save(discriminator.state_dict(), 'discriminator.pth')

# Assuming dataloader is already defined and initialized
generator = Generator(latent_dim, 4, 3).to(device)
discriminator = Discriminator(3, 4, 512, img_size=128).to(device)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
dataset = TextImageDataset(image_dir=output_image_dir, image_to_class=image_to_class, transform=transform)
dataloader = DataLoader(dataset, batch_size=3, shuffle=True)

train_gan(generator, discriminator, dataloader, epochs, lr, device)

ValueError: Target size (torch.Size([3, 1])) must be the same as input size (torch.Size([3, 256]))