# Task 10: End-to-End Text-to-Image Pipeline

This notebook implements a complete end-to-end pipeline for text-to-image generation, including text preprocessing, embedding, and image generation.


In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from transformers import BertTokenizer
import re
import numpy as np


## Text Preprocessing


In [None]:
def preprocess_text(text):
    """Clean and preprocess text input"""
    cleaned = text.lower()
    cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', cleaned)
    return cleaned

# Example
text = "Generate an image of a beautiful sunset!!!"
cleaned_text = preprocess_text(text)
print(f"Original: {text}")
print(f"Cleaned: {cleaned_text}")


## Text Embedding


In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def get_text_embedding(text):
    """Convert text to embedding"""
    cleaned = preprocess_text(text)
    encoded = tokenizer(cleaned, return_tensors='pt', padding='max_length', truncation=True, max_length=77)
    return encoded['input_ids'].float()

# Example
text_emb = get_text_embedding(text)
print(f"Text embedding shape: {text_emb.shape}")


## Generator Model


In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(77+100, 128),
            nn.ReLU(),
            nn.Linear(128, 28*28),
            nn.Tanh()
        )
    
    def forward(self, z, text_emb):
        x = torch.cat((z, text_emb), dim=1)
        return self.model(x).view(-1, 28, 28)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(28*28+77, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )
    
    def forward(self, img, text_emb):
        x = torch.cat((img.view(-1, 28*28), text_emb), dim=1)
        return self.model(x)


## Complete Pipeline


In [None]:
generator = Generator()
discriminator = Discriminator()
criterion = nn.BCELoss()
g_optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)
d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.001)

print("Models initialized!")


In [None]:
def generate_image_from_text(text, num_iterations=100):
    """Complete pipeline: text -> embedding -> image"""
    # Preprocess and embed text
    text_emb = get_text_embedding(text)
    
    # Generate random noise
    z = torch.randn(1, 100)
    
    # Training iterations
    for _ in range(num_iterations):
        fake_img = generator(z, text_emb)
        
        # Train Discriminator
        d_optimizer.zero_grad()
        d_fake = discriminator(fake_img.detach(), text_emb)
        d_loss = criterion(d_fake, torch.zeros(1))
        d_loss.backward()
        d_optimizer.step()
        
        # Train Generator
        g_optimizer.zero_grad()
        g_fake = discriminator(fake_img, text_emb)
        g_loss = criterion(g_fake, torch.ones(1))
        g_loss.backward()
        g_optimizer.step()
    
    # Generate final image
    with torch.no_grad():
        generated_img = generator(z, text_emb).detach().numpy()
    
    return generated_img[0]

# Example usage
text_prompts = [
    "A beautiful sunset over mountains",
    "A red car on a highway",
    "A cat sitting on a windowsill"
]

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for idx, prompt in enumerate(text_prompts):
    img = generate_image_from_text(prompt, num_iterations=50)
    axes[idx].imshow(img, cmap='gray')
    axes[idx].set_title(f"Generated for:\n{prompt[:30]}...")
    axes[idx].axis('off')

plt.tight_layout()
plt.show()

print("End-to-end pipeline completed!")
