In [None]:
# The variables below describes if the notebook is no test mode or not,
# when "test", it means the data can be fewer than the original data.
# Result is not important, but the speed to test if it the notebook work is.

ENV_TYPE = "test" # "test" or "production"

In [None]:
!pip install transformers
!pip install openai-clip

In [12]:
import random
import numpy as np

phrase_list = ['2d', 'pixel art', 'cave', 'scifi', 'side scrolling', 'chibi', 'waifu', 'space ship', 'desert', 'city', 'wasteland', 'mega structure', 'steal', 'stone', 'rock']

def prompt_generator(phrase_list, prompt_word_length=32):
    prompt = ''
    while len(prompt) < prompt_word_length:
        phrase = random.choice(phrase_list)
        if len(prompt) + len(phrase) + 1 <= prompt_word_length:
            prompt += phrase + ' '
        else:
            break
    return prompt.strip()

# example usage:
prompt=prompt_generator(phrase_list) 
print(prompt_generator(phrase_list))


wasteland steal mega structure


In [3]:
print(prompt)

chibi pixel art cave wasteland


In [13]:
import torch
import clip
import numpy as np

use_cuda = torch.cuda.is_available()
if use_cuda:
    # Move tensors to CUDA device
    device = torch.device('cuda:0')
    print(f"Using CUDA device: {torch.cuda.get_device_name(device)}")
else:
    # Use CPU
    device = torch.device('cpu')
    print("Warning: CUDA not available, using CPU.")

# Load the CLIP model
model, preprocess = clip.load("ViT-L/14", device=device)

# Tokenize and encode the prompt
prompt = "scifi mega structure rock cave"
tokens = clip.tokenize(prompt).to(device)
print("Size of tokens:", tokens.shape)
with torch.no_grad():
    text_features = model.encode_text(tokens).float()
    text_features /= text_features.norm(dim=-1, keepdim=True)

# Convert the embedding to a numpy array
embedding = text_features.cpu().numpy()

print("Text embedding shape:",embedding.shape)  # should print (1, 768)




Size of tokens: torch.Size([1, 77])
Text embedding shape: (1, 768)


Run for obtaining random prompts, word embeddings and clip embeddings

In [None]:
import json
import torch
import clip
import random
import numpy as np
from typing import List
from torch import nn
from transformers import CLIPTokenizer, CLIPTextModel

phrase_list = ['2d', 'pixel art', 'cave', 'scifi', 'side scrolling', 'chibi', 'waifu', 'space ship', 'desert', 'city', 'wasteland', 'mega structure', 'steal', 'stone', 'rock']

N=0
if (ENV_TYPE == "test"):
    N=5
elif (ENV_TYPE == "production"):
    N=500

# Load the CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-L/14", device=device)



class CLIPTextEmbedder(nn.Module):
    """
    ## CLIP Text Embedder
    """

    def __init__(self, version: str = "openai/clip-vit-large-patch14", device="cuda:0", max_length: int = 12):
        """
        :param version: is the model version
        :param device: is the device
        :param max_length: is the max length of the tokenized prompt
        """
        super().__init__()
        # Load the tokenizer
        self.tokenizer = CLIPTokenizer.from_pretrained(version)
        print(self.tokenizer)
        # Load the CLIP transformer
        self.transformer = CLIPTextModel.from_pretrained(version).eval()

        #self.device = device
        self.device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
  
        self.max_length = max_length

    def forward(self, prompts: List[str]):
        """
        :param prompts: are the list of prompts to embed
        """
        # Tokenize the prompts
        batch_encoding = self.tokenizer(prompts, truncation=True, max_length=self.max_length, return_length=True,
                                        return_overflowing_tokens=False, padding="max_length", return_tensors="pt")
        # Get token ids
        tokens = batch_encoding["input_ids"].to(self.device)
        # Get CLIP embeddings
        return self.transformer(input_ids=tokens).last_hidden_state

def generate_prompts(num_prompts, phrase_list, prompt_word_length,x):
    prompts = []
    for i in range(num_prompts):
        # Generate prompt
        prompt = prompt_generator(phrase_list, prompt_word_length)
        
        # Get word embedding
        with torch.no_grad():
            tokens = clip.tokenize(prompt).to(device)
            print(tokens.shape)
            print(tokens)
            print(type(tokens))
            out = x.forward(prompt)
            out=torch.flatten(out)
            out=torch.unsqueeze(out,0)
            print(out)
            print(type(out))
            print(out.shape)
        #    text_features = model.encode_text(tokens).float()
            print('Computing text embedding for Prompt'+str(i+1))
            text_features = model.encode_text(tokens).float()
            text_features /= text_features.norm(dim=-1, keepdim=True)
            print(text_features.shape)
            prompt_data = {
            "prompt": prompt,
            "word_embedding": out[0].tolist(),
            "clip_embedding": text_features[0].tolist(),
        }

        # Append prompt, word embedding, and clip embedding to list
        prompts.append(prompt_data)    
    # Save prompts as JSON to file
    with open("prompts_and_embeddings.json", "w") as f:
      json.dump(prompts, f)

x = CLIPTextEmbedder()
prompt_word_length=32
generate_prompts(N, phrase_list,prompt_word_length,x)
print('Process completed and dataset is generated')

In [None]:
import torch

# Declare a variable
a = torch.tensor([2, 3, 4, 5])
# Using unsqueeze() method to add dimension
print(a.shape)
m=torch.unsqueeze(a, 0)
# Print output
print(m.shape)

In [None]:
!pwd

Elm auto-encoder implementation

In [None]:
import time
import torch
import numpy as np
import torch.nn as nn
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity


class ELMAutoEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, batch_size=1, use_gpu=True):
        super(ELMAutoEncoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        if self.use_gpu and torch.cuda.is_available():
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")
            print("Warning: CUDA not available, using CPU.")
        self.set_random_seed()    
        self.weight = nn.Parameter(torch.randn(input_size, hidden_size, dtype=torch.float, device=self.device))
        self.bias = nn.Parameter(torch.randn(hidden_size, device=self.device))
        self.beta = nn.Parameter(torch.randn(hidden_size, output_size, device=self.device))
        self.activation_hidden = nn.ReLU()
        self.activation_output = nn.Identity()
        
    def forward(self, x):
        h = self.activation_hidden(x @ self.weight + self.bias)       
        y_pred = torch.sigmoid(h @ self.beta)  # apply sigmoid to output
        return y_pred

    def set_random_seed(self):
        seed = int(time.time())
        torch.manual_seed(seed)
        np.random.seed(seed)
    
    def fit(self, x_train,targets_train, n_neurons):
        H = self.activation_hidden(torch.matmul(x_train.float(), self.weight.float()) + self.bias.float())
     #  H = self.activation_hidden(torch.matmul(x_train.float(), self.weight) + self.bias)
        print('output of hiddenlayer',H.shape)
        H_inv = torch.pinverse(H)

        beta = torch.matmul(H_inv, targets_train.float())

        self.beta = nn.Parameter(beta)
        self.output_weight = nn.Parameter(self.beta)
        print('output weight matrix',self.output_weight.shape)
        self.bias = nn.Parameter(torch.zeros((n_neurons,)))
        return self
    
    def encode(self, x):
       # h = self.activation_hidden(x.float() @ self.weight.t())
        print('input to encoder',x.shape)
        h=self.activation_hidden(torch.matmul(x.float(), self.weight.float()) + self.bias.float())
        return h
    
    def decode(self, h):
        x_pred = self.activation_output(h @ self.output_weight)
        return x_pred
    
    def clear_memory(self):
        if self.use_gpu and torch.cuda.is_available():
            torch.cuda.empty_cache()
        else:
            torch.cuda.empty_cache()

def reconstruction_error(model,data,targets_train):
    with torch.no_grad():
        encoded = model.encode(data)
        print('encoded.shape',encoded.shape)
        decoded = model.decode(encoded)
        print('decoded',decoded.shape)
        mse_loss = nn.MSELoss()(decoded, targets_train)
        cos_sim = cosine_similarity(targets_train.reshape(targets_train.shape[0], -1), decoded.reshape(decoded.shape[0], -1))
        cos_dis=1 - cos_sim.mean()
        return mse_loss.item(),cos_dis

def load_prompts(file_path):
    with open(file_path) as f:
        prompts = json.load(f)
    return prompts

def prepare_data(prompts):
    text_features = []
    for prompt in prompts:
        text_feature = np.array(prompt['word_embedding'])
        text_features.append(text_feature)
    return text_features

def prepare_targets(prompts):
    clip_embeddings = []
    for prompt in prompts:
        clip_embedding = np.array(prompt['clip_embedding'])
        clip_embeddings.append(clip_embedding)
    return clip_embeddings



prompts = load_prompts('./prompts_and_embeddings.json')

# Prepare data and targets
data = prepare_data(prompts)
targets = prepare_targets(prompts)
hidden_size=500 if ENV_TYPE == 'production' else 5


# Convert to PyTorch tensors


# Split the data into train and test sets
data_train, data_test, targets_train, targets_test = train_test_split(data, targets, test_size=0.2, random_state=42)

data_train=torch.tensor(data_train)
data_test=torch.tensor(data_test)
targets_train=torch.tensor(targets_train)
targets_test=torch.tensor(targets_test)
#print(targets_test.shape[1])
autoencoder = ELMAutoEncoder(data_train.shape[1], hidden_size,targets_test.shape[1])
autoencoder.fit(data_train,targets_train, hidden_size)
#print(data_train.shape)
train_reconstruction_error,cos_dis = reconstruction_error(autoencoder,data_train,targets_train)
# Calculate the mean squared error and cosine distance
print('train mse error','{:.15f}'.format(train_reconstruction_error))
print('train cosine distance','{:.15f}'.format(cos_dis))

test_reconstruction_error,cos_dis = reconstruction_error(autoencoder,data_test,targets_test)
print('test mse error',test_reconstruction_error)
print('test cosine distance',cos_dis)




