In [2]:
!pip install transformers torch pandas





In [3]:
from transformers import AutoTokenizer, AutoModel
import torch
import pandas as pd


In [4]:
# Sample dataset or load from CSV
data = {
    'text_description': [
        'a dog playing in the snow',
        'a futuristic city at night',
        'a bowl of fresh fruits'
    ]
}

df = pd.DataFrame(data)
df.to_csv('descriptions.csv', index=False)  # Save for later use
df


Unnamed: 0,text_description
0,a dog playing in the snow
1,a futuristic city at night
2,a bowl of fresh fruits


In [5]:
model_name = "bert-base-uncased"  # Or use "openai/clip-vit-base-patch32"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)


In [6]:
texts = df['text_description'].tolist()

# Tokenize and pad the inputs
tokens = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
tokens


{'input_ids': tensor([[  101,  1037,  3899,  2652,  1999,  1996,  4586,   102],
        [  101,  1037, 28971,  2103,  2012,  2305,   102,     0],
        [  101,  1037,  4605,  1997,  4840, 10962,   102,     0]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 0]])}

In [7]:
# Disable gradient calculation
with torch.no_grad():
    outputs = model(**tokens)

# Use [CLS] token representation (first token) as embedding
embeddings = outputs.last_hidden_state[:, 0, :]
embeddings.shape  # (num_sentences, hidden_size)


torch.Size([3, 768])

In [8]:
torch.save(embeddings, "output_embeddings.pt")
print("Embeddings saved to output_embeddings.pt")


Embeddings saved to output_embeddings.pt


In [9]:
loaded = torch.load("output_embeddings.pt")
print("Loaded shape:", loaded.shape)


Loaded shape: torch.Size([3, 768])
