In [None]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [None]:
model=spacy.load('en_core_web_sm')

In [None]:
text="""The meaning of NLP is Natural Language Processing (NLP) which is a fascinating and rapidly evolving field that intersects computer science, artificial intelligence, and linguistics. NLP focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human language in a way that is both meaningful and useful. With the increasing volume of text data generated every day, from social media posts to research articles, NLP has become an essential tool for extracting valuable insights and automating various tasks. In this article, we will explore the fundamental concepts and techniques of Natural Language Processing, shedding light on how it transforms raw text into actionable information. From tokenization and parsing to sentiment analysis and machine translation, NLP encompasses a wide range of applications that are reshaping industries and enhancing human-computer interactions. Whether you are a seasoned professional or new to the field, this overview will provide you with a comprehensive understanding of NLP and its significance in today’s digital age."""
doc=model(text)

**Removing stopwords and punctuations**

In [None]:
tokens=[token.text.lower() for token in doc if not token.is_stop and not token.is_punct and token.text!='\n']

In [None]:
print(tokens)

['meaning', 'nlp', 'natural', 'language', 'processing', 'nlp', 'fascinating', 'rapidly', 'evolving', 'field', 'intersects', 'computer', 'science', 'artificial', 'intelligence', 'linguistics', 'nlp', 'focuses', 'interaction', 'computers', 'human', 'language', 'enabling', 'machines', 'understand', 'interpret', 'generate', 'human', 'language', 'way', 'meaningful', 'useful', 'increasing', 'volume', 'text', 'data', 'generated', 'day', 'social', 'media', 'posts', 'research', 'articles', 'nlp', 'essential', 'tool', 'extracting', 'valuable', 'insights', 'automating', 'tasks', 'article', 'explore', 'fundamental', 'concepts', 'techniques', 'natural', 'language', 'processing', 'shedding', 'light', 'transforms', 'raw', 'text', 'actionable', 'information', 'tokenization', 'parsing', 'sentiment', 'analysis', 'machine', 'translation', 'nlp', 'encompasses', 'wide', 'range', 'applications', 'reshaping', 'industries', 'enhancing', 'human', 'computer', 'interactions', 'seasoned', 'professional', 'new', '

**Another way of removing stopwords,punctuations using pos tagging**

In [None]:
tokens1=[]
stopwords=list(STOP_WORDS)
allowed_pos=['ADJ','PROPN','VERB','NOUN']
for token in doc:
  if token.text in stopwords or token.text in punctuation:
    continue
  if token.pos_ in allowed_pos:
    tokens1.append(token.text)
tokens1

['meaning',
 'NLP',
 'Natural',
 'Language',
 'Processing',
 'NLP',
 'fascinating',
 'evolving',
 'field',
 'intersects',
 'computer',
 'science',
 'artificial',
 'intelligence',
 'linguistics',
 'NLP',
 'focuses',
 'interaction',
 'computers',
 'human',
 'language',
 'enabling',
 'machines',
 'understand',
 'interpret',
 'generate',
 'human',
 'language',
 'way',
 'meaningful',
 'useful',
 'increasing',
 'volume',
 'text',
 'data',
 'generated',
 'day',
 'social',
 'media',
 'posts',
 'research',
 'articles',
 'NLP',
 'essential',
 'tool',
 'extracting',
 'valuable',
 'insights',
 'automating',
 'tasks',
 'article',
 'explore',
 'fundamental',
 'concepts',
 'techniques',
 'Natural',
 'Language',
 'Processing',
 'shedding',
 'light',
 'transforms',
 'raw',
 'text',
 'actionable',
 'information',
 'tokenization',
 'parsing',
 'sentiment',
 'analysis',
 'machine',
 'translation',
 'NLP',
 'encompasses',
 'wide',
 'range',
 'applications',
 'reshaping',
 'industries',
 'enhancing',
 'huma

**count the freuency of words**

In [None]:
from collections import Counter
word_freq=Counter(tokens)
print(word_freq)

Counter({'nlp': 6, 'language': 4, 'human': 3, 'natural': 2, 'processing': 2, 'field': 2, 'computer': 2, 'text': 2, 'meaning': 1, 'fascinating': 1, 'rapidly': 1, 'evolving': 1, 'intersects': 1, 'science': 1, 'artificial': 1, 'intelligence': 1, 'linguistics': 1, 'focuses': 1, 'interaction': 1, 'computers': 1, 'enabling': 1, 'machines': 1, 'understand': 1, 'interpret': 1, 'generate': 1, 'way': 1, 'meaningful': 1, 'useful': 1, 'increasing': 1, 'volume': 1, 'data': 1, 'generated': 1, 'day': 1, 'social': 1, 'media': 1, 'posts': 1, 'research': 1, 'articles': 1, 'essential': 1, 'tool': 1, 'extracting': 1, 'valuable': 1, 'insights': 1, 'automating': 1, 'tasks': 1, 'article': 1, 'explore': 1, 'fundamental': 1, 'concepts': 1, 'techniques': 1, 'shedding': 1, 'light': 1, 'transforms': 1, 'raw': 1, 'actionable': 1, 'information': 1, 'tokenization': 1, 'parsing': 1, 'sentiment': 1, 'analysis': 1, 'machine': 1, 'translation': 1, 'encompasses': 1, 'wide': 1, 'range': 1, 'applications': 1, 'reshaping': 

**Normalize the words**

In [None]:
max_freq=max(word_freq.values())
max_freq
for word in word_freq.keys():
  word_freq[word]=word_freq[word]/max_freq

**Calculating sentence score**

In [None]:
sent_token=[sent.text for sent in doc.sents]
sent_score={}
for sent in sent_token:
  for word in sent.split():
    if word.lower() in word_freq.keys():
      if sent not in sent_score.keys():
        sent_score[sent]=word_freq[word]
      else:
        sent_score[sent]+=word_freq[word]
    print(word)

The
meaning
of
NLP
is
Natural
Language
Processing
(NLP)
which
is
a
fascinating
and
rapidly
evolving
field
that
intersects
computer
science,
artificial
intelligence,
and
linguistics.
NLP
focuses
on
the
interaction
between
computers
and
human
language,
enabling
machines
to
understand,
interpret,
and
generate
human
language
in
a
way
that
is
both
meaningful
and
useful.
With
the
increasing
volume
of
text
data
generated
every
day,
from
social
media
posts
to
research
articles,
NLP
has
become
an
essential
tool
for
extracting
valuable
insights
and
automating
various
tasks.
In
this
article,
we
will
explore
the
fundamental
concepts
and
techniques
of
Natural
Language
Processing,
shedding
light
on
how
it
transforms
raw
text
into
actionable
information.
From
tokenization
and
parsing
to
sentiment
analysis
and
machine
translation,
NLP
encompasses
a
wide
range
of
applications
that
are
reshaping
industries
and
enhancing
human-computer
interactions.
Whether
you
are
a
seasoned
professional
or
new
to
the
f

**creating a dataframe for sentence and score**

In [None]:
import pandas as pd
pd.DataFrame(list(sent_score.items()),columns=['sentence','score'])

Unnamed: 0,sentence,score
0,The meaning of NLP is Natural Language Process...,1.666667
1,NLP focuses on the interaction between compute...,3.0
2,With the increasing volume of text data genera...,2.666667
3,"In this article, we will explore the fundament...",1.833333
4,From tokenization and parsing to sentiment ana...,2.0
5,Whether you are a seasoned professional or new...,1.5


**display the top n sentences**

In [None]:
from heapq import nlargest
num_sent=2
n=nlargest(num_sent,sent_score,key=sent_score.get)
" ".join(n)

'NLP focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human language in a way that is both meaningful and useful. With the increasing volume of text data generated every day, from social media posts to research articles, NLP has become an essential tool for extracting valuable insights and automating various tasks.'

In [None]:
pip install transformers




**Abstractive Text summarization using Transformers**

In [None]:
from transformers import pipeline

summarizer = pipeline("summarization", model='t5-base', tokenizer='t5-base', framework='pt')
#text = """The meaning of NLP is Natural Language Processing (NLP) which is a fascinating and rapidly evolving field that intersects computer science, artificial intelligence, and linguistics. NLP focuses on the interaction between computers and human language, enabling machines to understand, interpret, and generate human language in a way that is both meaningful and useful. With the increasing volume of text data generated every day, from social media posts to research articles, NLP has become an essential tool for extracting valuable insights and automating various tasks. In this article, we will explore the fundamental concepts and techniques of Natural Language Processing, shedding light on how it transforms raw text into actionable information. From tokenization and parsing to sentiment analysis and machine translation, NLP encompasses a wide range of applications that are reshaping industries and enhancing human-computer interactions. Whether you are a seasoned professional or new to the field, this overview will provide you with a comprehensive understanding of NLP and its significance in today’s digital age."""
summary = summarizer(text, max_length=100, min_length=10, do_sample=False)
print(summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


[{'summary_text': 'the meaning of NLP is natural language processing (NLP), which intersects computer science, artificial intelligence, and linguistics . enabling machines to understand, interpret, and generate human language in a way that is both meaningful and useful . with the increasing volume of text data generated every day, NLP has become an essential tool for extracting valuable insights .'}]


In [None]:
print(summary[0]['summary_text'])

the meaning of NLP is natural language processing (NLP), which intersects computer science, artificial intelligence, and linguistics . enabling machines to understand, interpret, and generate human language in a way that is both meaningful and useful . with the increasing volume of text data generated every day, NLP has become an essential tool for extracting valuable insights .
