In [1]:
import re
import nltk
from nltk import word_tokenize, sent_tokenize, pos_tag, ne_chunk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('wordnet')
nltk.download('omw-1.4')

input_text = "Barack Obama is a prime minister of USA."
print("Original Input:", input_text)

input_lower = input_text.lower()
print("\nLowercase:", input_lower)

input_lower_re = re.sub(r'2013', '', input_lower)
input_lower_re = re.sub(r'\[7\]', '', input_lower_re)
input_lower_re = re.sub(r'\d+', '', input_lower_re)
print("\nAfter Regex Substitutions:", input_lower_re)

word_tokens = word_tokenize(input_lower_re)
print("\nWord Tokens:", word_tokens)

sent_tokens = sent_tokenize(input_lower_re)
print("\nSentence Tokens:", sent_tokens)

stop_words = set(stopwords.words('english'))
tokens_stopwords = [token for token in word_tokens if token not in stop_words]
print("\nTokens after Stopwords Removal:", tokens_stopwords)

ps = PorterStemmer()
stemmed_tokens = [ps.stem(word) for word in tokens_stopwords]
print("\nStemmed Tokens:", stemmed_tokens)

lemma = WordNetLemmatizer()
lemmatized_tokens = [lemma.lemmatize(word) for word in tokens_stopwords]
print("\nLemmatized Tokens:", lemmatized_tokens)

pos = pos_tag(word_tokens)
print("\nPOS Tagging:", pos)

tokens = word_tokenize(input_text)
ner = ne_chunk(pos_tag(tokens))
print("\nNamed Entity Recognition:")
print(ner)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\safia\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-

Original Input: Barack Obama is a prime minister of USA.

Lowercase: barack obama is a prime minister of usa.

After Regex Substitutions: barack obama is a prime minister of usa.

Word Tokens: ['barack', 'obama', 'is', 'a', 'prime', 'minister', 'of', 'usa', '.']

Sentence Tokens: ['barack obama is a prime minister of usa.']

Tokens after Stopwords Removal: ['barack', 'obama', 'prime', 'minister', 'usa', '.']

Stemmed Tokens: ['barack', 'obama', 'prime', 'minist', 'usa', '.']

Lemmatized Tokens: ['barack', 'obama', 'prime', 'minister', 'usa', '.']

POS Tagging: [('barack', 'NN'), ('obama', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('prime', 'JJ'), ('minister', 'NN'), ('of', 'IN'), ('usa', 'JJ'), ('.', '.')]

Named Entity Recognition:
(S
  (PERSON Barack/NNP)
  (ORGANIZATION Obama/NNP)
  is/VBZ
  a/DT
  prime/JJ
  minister/NN
  of/IN
  (ORGANIZATION USA/NNP)
  ./.)
