In [1]:
import torch 
import re 
import gradio as gr
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel

In [2]:
device='cpu'
encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)

In [3]:
def predict(image,max_length=64, num_beams=4):
    image = image.convert('RGB')
    image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
    clean_text = lambda x: x.replace('<|endoftext|>','').split('\n')[0]
    caption_ids = model.generate(image, max_length = max_length)[0]
    caption_text = clean_text(tokenizer.decode(caption_ids))
    return caption_text 

In [4]:
from PIL import Image
img = Image.open('Image3.png')

In [5]:
descr = predict(img)
print(descr)

a person laying on the beach with a surfboard 


In [6]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def convert_to_hashtags(sentence):
    # Tokenize the sentence into words
    tokens = word_tokenize(sentence)

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token.lower() for token in tokens if token.isalpha() and token.lower() not in stop_words]

    # Convert words to hashtags
    hashtags = ['#' + token for token in filtered_tokens]

    return hashtags

# Example usage
hashtags = convert_to_hashtags(descr)
print(hashtags)

['#person', '#laying', '#beach', '#surfboard']


In [22]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('tekraj/avodamed-synonym-generator1')
embeddings = model.encode(hashtags)
print(embeddings)

[[ 1.6032012e-02  9.1123432e-03  3.4280807e-02 ... -6.1837919e-02
   8.9081414e-02 -2.0151392e-02]
 [-4.4979587e-02  1.9186940e-02  4.7269151e-02 ... -1.0629752e-02
   1.4186645e-02  9.5968824e-03]
 [-9.9930698e-03  2.8680803e-03  4.5658320e-02 ... -1.2822804e-02
  -3.2542644e-03  1.9380139e-02]
 [-5.3844891e-02  5.6241129e-02  1.2844045e-01 ... -5.4877698e-02
  -4.8924148e-02 -3.4847163e-02]
 [ 2.2477569e-02  5.9771188e-02  4.5491740e-02 ... -1.5824595e-02
   1.3204186e-01 -2.9412491e-02]
 [-9.3013719e-03  4.1709676e-02  5.6247119e-02 ...  3.0675132e-05
   7.1854852e-02 -1.1677127e-01]]


In [23]:
from transformers import AutoTokenizer, AutoModel
import torch


#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


# Sentences we want sentence embeddings for
sentences = ['This is an example sentence', 'Each sentence is converted']

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')
model = AutoModel.from_pretrained('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)

# Perform pooling. In this case, max pooling.
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

print("Sentence embeddings:")
print(sentence_embeddings)

Sentence embeddings:
tensor([[ 0.1145,  0.0768,  0.0263,  ..., -0.1323, -0.0056,  0.3162],
        [ 0.0065,  0.1654, -0.0364,  ...,  0.1892,  0.2014,  0.2443]])


In [25]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer

input_sentence = 'Two people sitting on the snow with a cup of coffee'

model = BartForConditionalGeneration.from_pretrained('eugenesiow/bart-paraphrase')
device = torch.device("cpu")
model = model.to(device)
tokenizer = BartTokenizer.from_pretrained('eugenesiow/bart-paraphrase')
batch = tokenizer(input_sentence, return_tensors='pt')
generated_ids = model.generate(batch['input_ids'])
generated_sentence = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

print(generated_sentence)

['Two people sitting on the snow with a cup of coffee.']


In [7]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")  
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")

sentence = descr

text =  "paraphrase: " + sentence + " </s>"

encoding = tokenizer.encode_plus(text,pad_to_max_length=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"].to("cpu"), encoding["attention_mask"].to("cpu")


outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    max_length=256,
    do_sample=True,
    top_k=120,
    top_p=0.95,
    early_stopping=True,
    num_return_sequences=5
)

for output in outputs:
    line = tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
    print(line)
    hashtags = convert_to_hashtags(line)
    print(hashtags)



A person laying on a beach with a surfboard.
['#person', '#laying', '#beach', '#surfboard']
A person laying on the beach with a surf board.
['#person', '#laying', '#beach', '#surf', '#board']
A person lying on the beach with a surf board.
['#person', '#lying', '#beach', '#surf', '#board']
Person sitting with a surfboard on the beach.
['#person', '#sitting', '#surfboard', '#beach']
A person laying with a surfboard on the beach.
['#person', '#laying', '#surfboard', '#beach']


In [30]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

def convert_to_hashtags(sentence):
    # Tokenize the sentence into words
    tokens = word_tokenize(sentence)

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token.lower() for token in tokens if token.isalpha() and token.lower() not in stop_words]

    # Convert words to hashtags
    hashtags = ['#' + token for token in filtered_tokens]

    return hashtags

# Example usage
hashtags = convert_to_hashtags(line)
print(hashtags)

['#two', '#people', '#snow', '#cup', '#coffee', '#stand']


In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("cactode/gpt2_urbandict_textgen_torch")

model = AutoModelForCausalLM.from_pretrained("cactode/gpt2_urbandict_textgen_torch")

In [44]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits

In [45]:
print(logits)

tensor([[[ -1.9520, -10.0520,  -4.4291,  ...,  -5.0669,  -7.1313,  -0.2235],
         [ -6.2522, -15.8066,  -5.1867,  ..., -13.4463, -12.4652,  -2.3737],
         [ -6.0018, -17.0299,  -9.8854,  ..., -12.0795, -17.0473, -11.6971],
         [ -0.7992, -16.7028,  -9.4771,  ..., -14.3147, -11.8018,  -6.5331],
         [ -0.3515, -12.4749,  -5.8269,  ...,  -8.5130, -10.8677,  -5.3507],
         [  2.4188, -13.0126,  -8.3715,  ..., -16.4383, -14.8155,  -3.0127]]],
       grad_fn=<UnsafeViewBackward0>)


In [48]:
import requests

API_URL = "https://api-inference.huggingface.co/models/cactode/gpt2_urbandict_textgen_torch"
headers = {"Authorization": "Bearer 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"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

output = query({
    "inputs": "Can you please let us know more details about your ",
})

In [49]:
print(output)

{'error': 'Internal Server Error'}


In [None]:
from transformers import AutoTokenizer, AutoModelWithLMHead
import torch

device = "cpu"
    
tokenizer = AutoTokenizer.from_pretrained("salesken/text_generate")
model = AutoModelWithLMHead.from_pretrained("salesken/text_generate").to(device)

input_query="tough challenges make you stronger.  "
input_ids = tokenizer.encode(input_query.lower(), return_tensors='pt').to(device)

sample_outputs = model.generate(input_ids,
                                do_sample=True,
                                num_beams=1, 
                                max_length=1024,
                                temperature=0.99,
                                top_k = 10,
                                num_return_sequences=1)

for i in range(len(sample_outputs)):
    print(tokenizer.decode(sample_outputs[i], skip_special_tokens=True))

