In [2]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import pos_tag

# Download necessary resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Initialize stemmer, lemmatizer, and stop words
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Function to perform the NLP pipeline
def nlp_pipeline(text):
    # Sentence Segmentation
    sentences = sent_tokenize(text)

    # Tokenization, Stemming, Lemmatization, Stop Words, POS tags
    token_data = []
    for sentence in sentences:
        words = word_tokenize(sentence)
        for word in words:
            token_info = {
                "Text": word,
                "Stem": stemmer.stem(word),
                "Lemma": lemmatizer.lemmatize(word),
                "POS": pos_tag([word])[0][1],
                "Is_Stop_Word": word in stop_words
            }
            token_data.append(token_info)

    return sentences, token_data

# Take paragraph input from the user
paragraph = input("Enter a paragraph: ")

# Perform NLP pipeline
sentences, token_data = nlp_pipeline(paragraph)

# Print results
print("\nSentence Segmentation:")
for i, sentence in enumerate(sentences, 1):
    print(f"{i}. {sentence}")

print("\nToken Data:")
for token_info in token_data:
    print(f"Text: {token_info['Text']}")
    print(f"Stem: {token_info['Stem']}")
    print(f"Lemma: {token_info['Lemma']}")
    print(f"POS: {token_info['POS']}")
    print(f"Is Stop Word: {token_info['Is_Stop_Word']}")
    print("-" * 40)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


Enter a paragraph: Lemmatization is quite similar to the Stamming. It is used to group different inflected forms of the word, called Lemma. The main difference between Stemming and lemmatization is that it produces the root word, which has a meaning.

Sentence Segmentation:
1. Lemmatization is quite similar to the Stamming.
2. It is used to group different inflected forms of the word, called Lemma.
3. The main difference between Stemming and lemmatization is that it produces the root word, which has a meaning.

Token Data:
Text: Lemmatization
Stem: lemmat
Lemma: Lemmatization
POS: NN
Is Stop Word: False
----------------------------------------
Text: is
Stem: is
Lemma: is
POS: VBZ
Is Stop Word: True
----------------------------------------
Text: quite
Stem: quit
Lemma: quite
POS: RB
Is Stop Word: False
----------------------------------------
Text: similar
Stem: similar
Lemma: similar
POS: JJ
Is Stop Word: False
----------------------------------------
Text: to
Stem: to
Lemma: to
POS: T