In [10]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer # Changed AutomodelForSequenceClassification to AutoModelForSequenceClassification

model_name = "cardiffnlp/tweet-topic-21-multi"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

labels = [

    "arts_&culture", "business&entrepreneurs", "celebrity&pop_culture", "diaries&_daily_life",

    "family", "fashion_&style", "film_tv&video", "fitness&health", "food&_dining",

    "gaming", "learning_&educational", "music", "news&_social_concern", "other_hobbies",

    "relationships", "science_&technology", "sports&esports", "travel&_adventure",

    "youth_&_student_life"

]

In [6]:
texts=[
    "The latest iphone was just released with an incredible new camera",
    "Manchester United won their match with a stunning goal in the last minute.",
    "NASA just launched a new mission to explore the surface of Mars.",
    "The Oscars had some surprising winners this year!"
]
# Tokenize the input texts
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
# Perform classification
with torch.no_grad():
    outputs = model(**inputs)
# Convert logits to probabilities using softma
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get predicted labels
predictions = torch.argmax(probabilities, dim=1)
for text, pred, prob in zip(texts, predictions, probabilities):
    print(f"Text: {text}\nTopic: {labels[pred.item()]}, Confidence: {prob[pred].item():.4f}\n")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Text: The latest iphone was just released with an incredible new camera
Topic: science_&technology, Confidence: 0.9303

Text: Manchester United won their match with a stunning goal in the last minute.
Topic: sports&esports, Confidence: 0.9989

Text: NASA just launched a new mission to explore the surface of Mars.
Topic: science_&technology, Confidence: 0.8526

Text: The Oscars had some surprising winners this year!
Topic: film_tv&video, Confidence: 0.9357



In [8]:
from transformers import pipeline
summarizer = pipeline("summarization")
text = """
Hugging Face is a company that specializes in natural language processing (NLP).
It has developed the Transformers library, which provides state-of-the-art models
for a wide range of NLP tasks such as text classification, information extraction,
question answering, summarization, translation, and more. The library is widely used
in both academia and industry due to its ease of use and flexibility.
"""

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Device set to use cpu


In [9]:
summary = summarizer(text, max_length=50, min_length=20, do_sample=False)
print("Summary:", summary[0]['summary_text'])

Summary:  The Transformers library provides state-of-the-art models for a wide range of NLP tasks . The library is widely used in both academia and industry due to its ease of use and flexibility .


In [11]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
prompt = "Once upon a time in a galaxy,"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50, num_return_sequences=1, temperature=0.7, top_k=50)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time in a galaxy, the galaxy was a place of great power and great danger. The galaxy was a place of great power and great danger. The galaxy was a place of great power and great danger. The galaxy was a place of
