In [7]:
! pip install transformers nltk




## Token Classification / Named-Entity Recognition (NER)

In [8]:
from transformers import pipeline

ner = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


In [9]:
text = "Barack Obama was born in Hawaii and served as president of the United States."

entities = ner(text)

for ent in entities:
    print(f"{ent['word']} → {ent['entity_group']} ({ent['score']:.2f})")


Barack Obama → PER (1.00)
Hawaii → LOC (1.00)
United States → LOC (1.00)


## Extractive Text Summarization (BERT-based)

In [10]:
! pip install bert-extractive-summarizer transformers




In [11]:
! pip install spacy
! python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [12]:
from summarizer import Summarizer

model = Summarizer()

In [13]:
text = """
The Apollo 11 mission was the first to land humans on the Moon.
Neil Armstrong and Buzz Aldrin walked on the lunar surface while Michael Collins remained in orbit.
The mission launched on July 16, 1969, and returned safely to Earth on July 24.
It was a historic achievement in the space race and marked a significant milestone in human exploration.
"""

summary = model(text)

print("Summary:\n", summary)


Summary:
 The Apollo 11 mission was the first to land humans on the Moon. Neil Armstrong and Buzz Aldrin walked on the lunar surface while Michael Collins remained in orbit.


## Extractive Question Answering

In [14]:
from transformers import pipeline

qa = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


In [15]:
context = """The Nile is the longest river in Africa. It flows through eleven countries and is a major water source in Egypt and Sudan."""
question = "Which continent is the Nile in?"

answer = qa(question=question, context=context)
print(f"Answer: {answer['answer']} (score: {answer['score']:.2f})")


Answer: Africa (score: 0.99)
