<a href="https://colab.research.google.com/github/nencyfaganiya/Text-to-Image-Generation/blob/main/Text_to_Image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from PIL import Image
from torchvision.datasets import ImageFolder
from torchvision.models import resnet50

In [None]:
import tarfile

# Set the path to the CUB_200_2011.tgz file
cub_tgz_file = '/content/drive/MyDrive/CUB_200_2011.tgz'

# Set the path to the CUB_200_2011 directory
cub_dir = '/content/drive/MyDrive/CUB_200_2011'

# Open the .tgz file in read mode
with tarfile.open(cub_tgz_file, 'r:gz') as tar:
    # Extract all files to the CUB_200_2011 directory
    tar.extractall(path=cub_dir)

In [None]:
import os
import random
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torch.optim import Adam
from torch.nn.functional import binary_cross_entropy_with_logits
from torchvision import transforms

# Define the DCGAN model
class DCGAN(nn.Module):
    def __init__(self):
        super(DCGAN, self).__init__()

        # Define the generator
        self.g = nn.Sequential(
            nn.Linear(100 + 36*36, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(True),
            nn.Linear(1024, 3*32*32),
            nn.Tanh()
        )

        # Define the discriminator
        self.d = nn.Sequential(
            nn.Linear(3*32*32, 1024),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, z, skip_thought):
        # Concatenate the latent vector and skip thought vector
        x = torch.cat((z, skip_thought), dim=1)
        # Generate images from the concatenated vector
        img = self.g(x)
        # Reshape the generated image to (batch_size, 3, 32, 32)
        img = img.view(img.size(0), 3, 32, 32)
        # Classify the generated images
        out = self.d(img.view(img.size(0), -1))
        return img, out

# Define the data transform
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define the data loader
dataset_dir = '/content/drive/MyDrive/CUB_200_2011'

# Define the dataset class
class CUB200Dataset(Dataset):
    def __init__(self, root_dir, split):
        self.root_dir = root_dir
        self.split = split
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.image_dir = os.path.join(root_dir, 'images')
        self.species_dirs = [d for d in os.listdir(self.image_dir) if os.path.isdir(os.path.join(self.image_dir, d))]
        if split == 'train':
            self.image_files = [os.path.join(self.image_dir, d, f) for d in self.species_dirs for f in os.listdir(os.path.join(self.image_dir, d)) if f.endswith('.jpg')]
        elif split == 'val':
            self.image_files = [os.path.join(self.image_dir, d, f) for d in self.species_dirs[:100] for f in os.listdir(os.path.join(self.image_dir, d)) if f.endswith('.jpg')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        image = Image.open(image_path).convert('RGB')
        image = self.transform(image)
        species_name = os.path.basename(os.path.dirname(image_path))
        species_idx = self.species_dirs.index(species_name)
        return image, species_idx

# Set the root directory of the CUB_200_2011 dataset
root_dir = '/content/drive/MyDrive/CUB_200_2011'

# Create the train and validation datasets
train_dataset = CUB200Dataset(root_dir, split='train')
val_dataset = CUB200Dataset(root_dir, split='val')

# Create the DataLoader objects
train_data_loader = DataLoader(train_dataset, batch_size=64, sampler=SubsetRandomSampler(train_indices), num_workers=4)
val_data_loader = DataLoader(val_dataset, batch_size=64, sampler=SubsetRandomSampler(val_indices), num_workers=4)

# Define the DCGAN model
dcgan = DCGAN()

# Define the loss function and optimizer for the generator
criterion_g = nn.MSELoss()
optimizer_g = Adam(dcgan.g.parameters(), lr=0.0002)

# Define the loss function and optimizer for the discriminator
criterion_d = nn.BCELoss()
optimizer_d = Adam(dcgan.d.parameters(), lr=0.0002)

# Train the model
num_epochs = 100

for epoch in range(num_epochs):
    # Train the discriminator
    for i, images in enumerate(train_data_loader):
        # Generate fake images
        noise = torch.randn(images.size(0), 100).to(device)
        skip_thought = torch.randn(images.size(0), 36*36).to(device)
        fake_images = dcgan.g(noise, skip_thought)

        # Classify the real images
        real_labels = torch.ones(images.size(0), 1).to(device)
        real_outputs = dcgan.d(images)
        real_loss = criterion_d(real_outputs, real_labels)

        # Classify the fake images
        fake_labels = torch.zeros(fake_images.size(0), 1).to(device)
        fake_outputs = dcgan.d(fake_images.detach())
        fake_loss = criterion_d(fake_outputs, fake_labels)

        # Compute the total loss for the discriminator
        d_loss = (real_loss + fake_loss) / 2

        # Backpropagate and optimize the discriminator
        optimizer_d.zero_grad()
        d_loss.backward()
        optimizer_d.step()

        # Train the generator
        noise = torch.randn(images.size(0), 100).to(device)
        skip_thought = torch.randn(images.size(0), 36*36).to(device)
        fake_labels = torch.ones(images.size(0), 1).to(device)
        fake_outputs = dcgan.d(fake_images)
        g_loss = criterion_d(fake_outputs, fake_labels)

        # Backpropagate and optimize the generator
        optimizer_g.zero_grad()
        g_loss.backward()
        optimizer_g.step()

    # Evaluate the modelon the validation set
    with torch.no_grad():
        val_loss = 0
        for images in val_data_loader:
            # Generate fake images
            noise = torch.randn(images.size(0), 100).to(device)
            skip_thought = torch.randn(images.size(0), 36*36).to(device)
            fake_images = dcgan.g(noise, skip_thought)

            # Classify the fake images
            fake_labels = torch.ones(fake_images.size(0), 1).to(device)
            fake_outputs = dcgan.d(fake_images)
            g_loss = criterion_d(fake_outputs, fake_labels)

            # Compute the validation loss
            val_loss += g_loss.item()


# Print the validation loss
if len(val_data_loader) > 0:
    val_loss /= len(val_data_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss:.4f}')
else:
    print(f'Epoch {epoch+1}/{num_epochs}, Validation Loss: N/A (No validation data)')