In [8]:
# This file is to extract features from text into embeddings

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import torch
def extract_features(input_text):
    with torch.no_grad():
        inputs = tokenizer(input_text, return_tensors="pt")
        outputs = model.encoder(**inputs, output_hidden_states=True, return_dict = True)
    hidden_states = outputs.hidden_states
    last_hidden_state = hidden_states[-1]
    pooled_output = torch.mean(last_hidden_state, dim=1)[0]
    return pooled_output


In [13]:
import json

sarcasm_data = json.load(open("sarcasm_data.json"))
result = {}
for key, data in sarcasm_data.items():
    result[key] = extract_features(data['utterance']).tolist()


In [15]:
with open("text_features.json_t5", "w") as f:
    json.dump(result, f)

In [1]:
from transformers import BertTokenizer, BertModel
import torch

# Load pre-trained BERT model and tokenizer
model_name = 'bert-large-uncased'  # Can use other models like 'bert-large-uncased'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)

# Set the model to evaluation mode (to prevent weight updates)
model.eval()


  from .autonotebook import tqdm as notebook_tqdm


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
  

In [6]:
def extract_features(input_text):
    with torch.no_grad():
        inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt")
        outputs = model(**inputs)
    last_hidden_states = outputs.last_hidden_state
    cls_embeddings = last_hidden_states[:, 0, :][0].tolist()
    return cls_embeddings

result = {}
for key, data in sarcasm_data.items():
    result[key] = extract_features(data['utterance'])

with open("text_features_bert.json", "w") as f:
    json.dump(result, f)

In [10]:
with open("text_features_bert.json", "r") as f:
    bert_features = json.load(f)
    print(len(bert_features['1_10004']))

768


In [20]:
import open_clip
import torch

# Load a pre-trained OpenCLIP model and tokenizer
model_name = "ViT-B-32-quickgelu"  # or use "ViT-L-14" for larger models
pretrained = "openai"

model, _, preprocess = open_clip.create_model_and_transforms(model_name, pretrained=pretrained)
tokenizer = open_clip.get_tokenizer(model_name)

# Ensure the model is in evaluation mode
model.eval()


CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (patch_dropout): Identity()
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): ModuleList(
        (0-11): 12 x ResidualAttentionBlock(
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ls_1): Identity()
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ls_2): Identity()
        )
      )
    )
    (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  

In [23]:

def extract_features(input_text):
    with torch.no_grad():
        inputs = tokenizer(input_text)
        outputs = model.encode_text(inputs)

    return outputs[0].tolist()

result = {}
for key, data in sarcasm_data.items():
    
    result[key] = extract_features(data['utterance'])

with open("text_features_clip.json", "w") as f:
    json.dump(result, f)

