In [1]:
from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer, AutoModel
import torch
device = "cuda" if torch.cuda.is_available() else "cpu" 

2024-01-24 10:58:14.806094: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 10:58:14.860593: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from transformers import pipeline

# Initialize the Hugging Face summarization pipeline using the flan-T5 model.
summarizer = pipeline("summarization", model="google/flan-t5-base")

# Define the text to be summarized.
text = """
    The internet, a revolutionary technology that began in the late 1960s as a project 
    of the United States Department of Defense, has transformed into a global phenomenon 
    that plays a pivotal role in the daily lives of billions of people. 
    Originally developed as ARPANET (Advanced Research Projects Agency Network), 
    the internet's primary goal was to enable computer networks to communicate with each other, 
    especially in the event of a nuclear attack.
"""

# Generate a summary of the provided text.
summary = summarizer(text, max_length=50)

# Print the summarized text.
print(summary[0]['summary_text'])


The Internet is a global phenomenon that plays a pivotal role in the daily lives of billions of people. Originally developed as ARPANET (Advanced Research Projects Agency Network), the internet's primary goal was to


In [3]:
from transformers import pipeline

# Initialize the sentiment analysis pipeline using a RoBERTa model specifically trained for detecting hate speech. non-hate speech categories.
sentiment_analyzer = pipeline("sentiment-analysis", model="facebook/roberta-hate-speech-dynabench-r4-target")

# Two example texts for analysis.
non_hate_text = "I love using new technology tools for my projects!"
hate_text = "#Person 1# tells Tommy that the movie was terrible, dumb and stupid."

# Parameters for retrieving raw logits from the model.
# 'top_k': None - Returns scores for all classes.
# 'function_to_apply': 'none' - Retrieves raw logits without applying any transformation like softmax.
# 'batch_size': 16 - Processes up to 16 texts in a batch for faster computation.
reward_logits_kwargs = {
    "top_k": None,
    "function_to_apply": "none",
    "batch_size": 16
}

# Analyze the texts and print the raw logits.
print(sentiment_analyzer(non_hate_text, **reward_logits_kwargs))
print(sentiment_analyzer(hate_text, **reward_logits_kwargs))


[{'label': 'nothate', 'score': 4.664432048797607}, {'label': 'hate', 'score': -4.196918964385986}]
[{'label': 'hate', 'score': 0.37227317690849304}, {'label': 'nothate', 'score': -0.6921192407608032}]


In [4]:
from transformers import pipeline

# Initialize a Named Entity Recognition (NER) pipeline using the BERT model fine-tuned on CoNLL-03 English data.
# This model is optimized for identifying named entities (like person names, locations, organizations) in English text.
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

# Example text for NER. The model will identify named entities in this text.
text = """
    Hugging Face Inc. is a company based in New York City. 
    Its technology is based on a Transformer model developed by researchers at Google.
"""

# Perform Named Entity Recognition on the text.
# The output will be a list of entities with details like the word, entity type, and confidence score.
ner_results = ner_pipeline(text)

# Function to reconstruct full words from subword tokens and associate them with their respective entity type.
# BERT and similar models often break down words into smaller sub-units (subwords), which this function will recombine.
def reconstruct_words_from_tokens(ner_results):
    reconstructed_words = []
    current_word = ""
    current_entity = ""

    for item in ner_results:
        token = item['word']
        entity = item['entity']

        # Check if the token is a continuation of the previous word (subwords in BERT start with '##').
        if token.startswith("##"):
            current_word += token[2:]  # Remove the '##' and append to the current word.
        else:
            # If the token is a new word, store the previous word and its entity type, then start a new word.
            if current_word:
                reconstructed_words.append((current_word, current_entity))
            current_word = token
            current_entity = entity

    # Add the last accumulated word if it exists.
    if current_word:
        reconstructed_words.append((current_word, current_entity))

    return reconstructed_words

# Use the helper function to get the full words and their entities.
reconstructed_words = reconstruct_words_from_tokens(ner_results)

# Print each word along with its identified entity type.
for word, entity in reconstructed_words:
    print(f"Word: {word}, Entity: {entity}")


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Word: Hugging, Entity: I-ORG
Word: Face, Entity: I-ORG
Word: Inc, Entity: I-ORG
Word: New, Entity: I-LOC
Word: York, Entity: I-LOC
Word: City, Entity: I-LOC
Word: Transformer, Entity: I-MISC
Word: Google, Entity: I-ORG


In [5]:
from transformers import pipeline

# Initialize a question-answering pipeline using the DistilBERT model.
qa_pipeline = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

# Define the context and the question.
# Context: A passage or text where the answer to the question can be found.
# Question: The question we want to find an answer to from the context.
context = "Hugging Face is a technology company based in New York. It specializes in Natural Language Processing."
question = "What does Hugging Face specialize in?"

# Use the pipeline to find the answer to the question based on the context.
answer = qa_pipeline(question=question, context=context)

# Print the answer.
print(answer['answer'])


Natural Language Processing


In [6]:
from transformers import pipeline

# Initialize the text generation pipeline using the GPT-2 model.
text_generator = pipeline("text-generation", model="gpt2")

# Define the prompt for text generation.
prompt = "In a distant future, humans and AI coexist in harmony."

# Generate text based on the prompt.
generated_text = text_generator(prompt, max_length=50, num_return_sequences=1)

# Print the generated text.
print(generated_text[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In a distant future, humans and AI coexist in harmony. Humanity's only chance to fight a powerful corporation is to harness technology and engineering to create an alternate universe, and it may be even earlier.

The story, titled: Humanity vs


In [7]:
from transformers import AutoTokenizer, pipeline

# Initialize the tokenizer for the T5 model with a specified maximum length.
tokenizer = AutoTokenizer.from_pretrained("t5-base", model_max_length=512)

# Initialize the translation pipeline.
translator = pipeline("translation_en_to_de", model="t5-base", tokenizer=tokenizer)

# Example text that we want to translate from English to German.
text_to_translate = "Hugging Face is a great tool for Natural Language Processing."

# Perform the translation.
translated_text = translator(text_to_translate, max_length=40)

# Print the translated text.
print(translated_text[0]['translation_text'])


Hugging Face ist ein hervorragendes Werkzeug für die Verarbeitung natürlicher Sprachen.
