<a href="https://colab.research.google.com/github/muraleee/collab-stuff/blob/main/Random.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")  #1
model = BertModel.from_pretrained("bert-base-uncased")   #1

input_text = '''
After a long day at work, Sarah decided to relax by taking her
dog for a walk in the park. As they strolled along the
tree-lined paths, Sarah's dog, Max, eagerly sniffed around,
chasing after squirrels and birds. Sarah smiled as she watched
Max enjoy himself, feeling grateful for the companionship and
joy that her furry friend brought into her life.'''

tokens = tokenizer(input_text, return_tensors="pt")

with torch.no_grad():
    outputs = model(**tokens)  #2

last_hidden_states = outputs.last_hidden_state  #3
print("Token embeddings:")  #4
for token, embedding in zip(tokens["input_ids"][0],
                            last_hidden_states[0]):
    word = tokenizer.decode(int(token))
    print(f"{word}: {embedding}")

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

tsne = TSNE(n_components=2, perplexity=5, random_state=42)  #1
embeddings_tsne = tsne.fit_transform(last_hidden_states[0])   #1

plt.figure(figsize=(10, 8))  #2
plt.scatter(embeddings_tsne[:, 0],   #2
            embeddings_tsne[:, 1], marker='o')   #2
for i, word in enumerate(tokenizer.convert_ids_to_tokens(   #2
    tokens["input_ids"][0])):   #2
    plt.annotate(word, xy=(embeddings_tsne[i, 0],   #2
                           embeddings_tsne[i, 1]),   #2
                 fontsize=10)   #2
plt.xlabel('t-SNE Dimension 1') #2
plt.ylabel('t-SNE Dimension 2') #2
plt.title('t-SNE Visualization of Token Embeddings')
plt.show()

In [None]:
a = [1,2,3,4,5,6,7,8,9]
b = map(lambda x: x*x, a)
b = list(b)
for i, j in zip(a,b):
  print(i, j)

In [None]:
from transformers import BertTokenizer, BertModel
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertModel.from_pretrained("bert-base-uncased")

input_text = '''
After a long day at work, Sarah decided to relax by taking her
dog for a walk in the park. As they strolled along the
tree-lined paths, Sarah's dog, Max, eagerly sniffed around,
chasing after squirrels and birds. Sarah smiled as she watched
Max enjoy himself, feeling grateful for the companionship and
joy that her furry friend brought into her life.'''

tokens = tokenizer(input_text, return_tensors="pt")
embeddings = model.embeddings  #1
positional_embeddings = embeddings.position_embeddings.weight  #2
position_ids = torch.arange(tokens['input_ids'].size(1),
                            dtype=torch.long).unsqueeze(0)  #3
input_positional_embeddings = positional_embeddings[position_ids]  #4

print("Positional embeddings shape:", input_positional_embeddings.shape)
print("Positional embeddings for each token:")

for token_id, pos_embedding in zip(tokens['input_ids'][0],
                                   input_positional_embeddings[0]):
    token = tokenizer.decode([token_id])
    print(f"{token}: {pos_embedding}")

In [None]:
import math

x = [4.5, 6, 3.2]
y = list(map(lambda x: math.exp(x), x ))
a = 0
for i in y:
  a = a + i
z = list(map(lambda x: x/a, y))
z

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    "distilbert/distilbert-base-uncased-finetuned-sst-2-english")


In [None]:
!du -h  ~/.cache/huggingface/hub

In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased-finetuned-sst-2-english")

In [None]:
import torch

text = "I loved the movie, it was fantastic!"

inputs = tokenizer(text, return_tensors = "pt")  #1

outputs = model(**inputs)  #1
# print(outputs)
predicted_label = torch.argmax(outputs.logits)
predicted_label

In [None]:
from transformers import pipeline

classifier = pipeline(task = 'text-classification',
    model = 'distilbert/distilbert-base-uncased-finetuned-sst-2-english')

In [None]:
review1 = '''From the warm welcome to the exquisite dishes and impeccable
 service, dining at Gourmet Haven is an unforgettable experience that
 leaves you eager to return.'''

review2 = '''Despite high expectations, our experience at Savor Bistro
 fell short; the food was bland, service was slow, and the overall
 atmosphere lacked charm, leaving us disappointed and unlikely to
 revisit.'''

In [None]:
print(classifier(review2))

In [None]:
from transformers import pipeline

question_classifier = pipeline("text-classification",
                               model="huaen/question_detection")

In [None]:
response = question_classifier(
    '''
    started from this or that but where
    ''')
print(response)

In [None]:
language_classifier = pipeline("text-classification",
    model="papluca/xlm-roberta-base-language-detection")

In [None]:
response = language_classifier("lampa")
print(response)

In [None]:
from transformers import pipeline

translator = pipeline("translation",
                      model = "google-t5/t5-base")

In [None]:
translator = pipeline(task = 'translation_en_to_hi',
                      model = "google-t5/t5-base")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

model_name_or_path = "tencent/HY-MT1.5-1.8B"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto")  # You may want to use bfloat16 and/or move to GPU here
messages = [
    {"role": "user", "content": "Translate the following segment into Hindi, without additional explanation.\n\nRead this book and tell me what it discusses."},
]
tokenized_chat = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=False,
    return_tensors="pt"
)

outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=2048)
output_text = tokenizer.decode(outputs[0])

In [None]:
outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=2048)
output_text = tokenizer.decode(outputs[0])
output_text

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
!pip install sentencepiece
!pip install protobuf

In [None]:
from transformers import pipeline

zero_shot_classifier = pipeline("zero-shot-classification",
                                model='joeddav/xlm-roberta-large-xnli')

In [None]:
text1 = '''
"In the intricate realm of global affairs, the interplay of power,
diplomacy, and governance stands as a defining force in the
trajectory of nations. Amidst fervent debates in legislative
chambers and pivotal dialogues among world leaders, ideologies
clash and policies take shape, shaping the course of societies.
Issues such as economic disparity, environmental stewardship, and
human rights take precedence, driving conversations and shaping
public sentiment. In an age of digital interconnectedness, social
media platforms have emerged as influential channels for discourse
and activism, amplifying voices and reshaping narratives with
remarkable speed and breadth. As citizens grapple with the
complexities of contemporary governance, the pursuit of accountable
and transparent leadership remains paramount, reflecting an
enduring quest for fairness and inclusivity in societal governance."
'''

text2 = '''
In the tender tapestry of human connection, romance weaves its
delicate threads, binding hearts in a dance of passion and longing.
From the flutter of a first glance to the warmth of an intimate
embrace, love blooms in the most unexpected places, transcending
barriers of time and circumstance. In the gentle caress of a hand
and the whispered promises of affection, two souls find solace in
each other's embrace, navigating the complexities of intimacy with
tender care. As the sun sets and stars illuminate the night sky,
lovers share stolen moments of intimacy, lost in the intoxicating
rhythm of each other's presence. In the symphony of love, every
glance, every touch, speaks volumes of a shared bond that defies
explanation, leaving hearts entwined in an eternal embrace.
'''

In [None]:
candidate_labels = ["technology", "politics", "business", "romance"]
prediction = zero_shot_classifier(text1,
                                  candidate_labels,
                                  multi_label = True)

In [None]:
prediction = zero_shot_classifier([text1, text2],
                                  candidate_labels,
                                  multi_label = True)
display(pd.DataFrame(prediction).drop(["sequence"], axis=1))

In [None]:
from transformers import pipeline

classifier = pipeline("zero-shot-image-classification",
                      model = "openai/clip-vit-large-patch14-336")

In [None]:
labels_for_classification =  ["airplane", "car", "train"]
scores = classifier("Emirates.png",
                    candidate_labels = labels_for_classification)
pd.DataFrame(scores)