In [2]:
from transformers import CLIPTextModel, CLIPTokenizer
import torch
import torch.nn as nn
from torchvision.utils import save_image
import os

class TextEmbedder:
    def __init__(self, model_name="openai/clip-vit-base-patch32"):
        self.model = CLIPTextModel.from_pretrained(model_name)

    def get_embedding(self, tokenized_input):
        with torch.no_grad():
            output = self.model(**tokenized_input)
        return output.last_hidden_state.mean(dim=1)

class SimulatedGAN(nn.Module):
    def __init__(self, embedding_dim=512, img_size=64):
        super(SimulatedGAN, self).__init__()
        self.generator = nn.Sequential(
            nn.Linear(embedding_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 3 * img_size * img_size),
            nn.Tanh()
        )
        self.img_size = img_size

    def forward(self, embedding):
        out = self.generator(embedding)
        return out.view(-1, 3, self.img_size, self.img_size)

def generate_and_save_image(model, embedding, output_path='output/generated_image.png'):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    image = model(embedding)
    save_image(image, output_path)
    print(f"[✓] Image saved to: {output_path}")

class TextPreprocessor:
    def __init__(self, model_name="openai/clip-vit-base-patch32"):
        self.tokenizer = CLIPTokenizer.from_pretrained(model_name)

    def preprocess(self, text):
        tokens = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        return tokens

def print_tensor_shape(name, tensor):
    print(f"{name} shape: {tensor.shape}")

def main():
    text_prompt = "A serene mountain landscape with glowing clouds during sunset"

    preprocessor = TextPreprocessor()
    tokenized = preprocessor.preprocess(text_prompt)

    embedder = TextEmbedder()
    embedding = embedder.get_embedding(tokenized)
    print_tensor_shape("Text Embedding", embedding)

    gan = SimulatedGAN()
    generate_and_save_image(gan, embedding, output_path="output/generated_image.png")

if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

Text Embedding shape: torch.Size([1, 512])
[✓] Image saved to: output/generated_image.png


model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]