In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# Sample text
text = "This is a great product!"

# Tokenize input
inputs = tokenizer(text, return_tensors='pt')

# Get predictions
outputs = model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[0.4081, 0.5919]], grad_fn=<SoftmaxBackward0>)


In [2]:
[[0.3803, 0.6197]]

[[0.3803, 0.6197]]

In [3]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch

# Load pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

# Sample context and question
context = "BERT is a language model developed by Google."
question = "Who developed BERT?"

# Tokenize input
inputs = tokenizer.encode_plus(question, context, return_tensors='pt')
input_ids = inputs['input_ids']
token_type_ids = inputs['token_type_ids']

# Get answer
outputs = model(input_ids, token_type_ids=token_type_ids)
start_scores, end_scores = outputs.start_logits, outputs.end_logits
all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
answer = ' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores) + 1])
print(answer)


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


google


In [4]:
from transformers import BertTokenizer, BertForTokenClassification
from transformers import pipeline

# Load pre-trained model and tokenizer
tokenizer = BertTokenizer.from_pretrained('dslim/bert-base-NER')
model = BertForTokenClassification.from_pretrained('dslim/bert-base-NER')

# Sample text
text = "Hugging Face Inc. is a company based in New York City."

# Tokenize input
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs).logits

# Post-process
predictions = torch.argmax(outputs, dim=2)
labels = [model.config.id2label[label_id.item()] for label_id in predictions[0]]

# Output labels
tokens = tokenizer.tokenize(text)
for token, label in zip(tokens, labels):
    print(f"{token}: {label}")


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Hu: O
##gging: B-ORG
Face: I-ORG
Inc: I-ORG
.: I-ORG
is: O
a: O
company: O
based: O
in: O
New: O
York: B-LOC
City: I-LOC
.: I-LOC


In [5]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# Load pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")

# Sample text
text = "Hugging Face Inc. is a company based in New York City."

# Tokenize input and predict
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
results = nlp(text)

# Output POS tags
for result in results:
    print(f"{result['word']}: {result['entity']}")


Some weights of the model checkpoint at vblagoje/bert-english-uncased-finetuned-pos were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


hugging: PROPN
face: PROPN
inc: PROPN
.: PUNCT
is: AUX
a: DET
company: NOUN
based: VERB
in: ADP
new: PROPN
york: PROPN
city: PROPN
.: PUNCT


In [6]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Load pre-trained model and tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

# Sample text
text = "BERT is a language model developed by Google. It has numerous applications in natural language processing (NLP) tasks due to its ability to understand the context of words in a sentence."

# Tokenize input
inputs = tokenizer([text], max_length=1024, return_tensors='pt', truncation=True)

# Generate summary
summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(summary)


BERT is a language model developed by Google. It has numerous applications in natural language processing (NLP) tasks due to its ability to understand the context of words in a sentence. For more information on BERT, visit Google's BERT website.


In [7]:
from transformers import MarianTokenizer, MarianMTModel

# Load pre-trained model and tokenizer
tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-de')
model = MarianMTModel.from_pretrained('Helsinki-NLP/opus-mt-en-de')

# Sample text
text = "BERT is a language model developed by Google."

# Tokenize input and translate
translated = model.generate(**tokenizer.prepare_seq2seq_batch([text], return_tensors='pt'))
translation = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
print(translation)


ImportError: 
MarianTokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.


In [8]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Sample text
text = "BERT is a language model developed by"

# Tokenize input
inputs = tokenizer.encode(text, return_tensors='pt')

# Generate text
outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


BERT is a language model developed by the University of California, Berkeley. It is a model of the human brain that is based on the idea that the brain is a collection of neurons that are connected to the rest of the body. The model is


In [9]:
from sentence_transformers import SentenceTransformer, util

# Load pre-trained Sentence-BERT model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Sample documents and query
documents = ["BERT is a language model developed by Google.", "Transformers are a type of neural network architecture."]
query = "Who developed BERT?"

# Encode documents and query
doc_embeddings = model.encode(documents)
query_embedding = model.encode(query)

# Compute similarities
similarities = util.pytorch_cos_sim(query_embedding, doc_embeddings)
print(similarities)


tensor([[0.5406, 0.0100]])


In [10]:
import spacy
import neuralcoref

# Load spaCy model and add neuralcoref to it
nlp = spacy.load('en_core_web_sm')
neuralcoref.add_to_pipe(nlp)

# Sample text
text = "John went to the store. He bought some milk."

# Process text
doc = nlp(text)

# Print coreferences
for cluster in doc._.coref_clusters:
    print(cluster)


ModuleNotFoundError: No module named 'neuralcoref'

In [11]:
from sentence_transformers import SentenceTransformer, util

# Load pre-trained Sentence-BERT model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Sample sentences
sentence1 = "BERT is a language model developed by Google."
sentence2 = "Google developed a language model called BERT."

# Encode sentences
embedding1 = model.encode(sentence1)
embedding2 = model.encode(sentence2)

# Compute similarity
similarity = util.pytorch_cos_sim(embedding1, embedding2)
print(similarity)


tensor([[0.9630]])




In [None]:
Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
google