In [None]:
paragraph = """ At its core, NLP is a branch of artificial intelligence that focuses on enabling computers to understand,
interpret, and respond to human language in a meaningful way. Whether it's text or speech, 
NLP gives machines the ability to interact with us just like another human might.
Think about how you use Siri, Google Assistant, or Alexa. Ever typed a sentence into Google Translate or spoken a message that was transcribed into text? All of this is made possible through NLP.
So, how does it work?
NLP combines linguistics, machine learning, and statistics. It breaks down language into parts—like tokens, syntax, semantics, and context—so computers can analyze them.
Tools like NLTK, spaCy, and models like GPT (yes, the same tech powering ChatGPT!) are used for tasks such as translation, summarization, and answering questions.
Of course, NLP isn’t perfect. Human language is complex—it’s full of sarcasm, slang, cultural nuances, and ambiguity. But with advances in deep learning and big data, NLP is rapidly improving.
In the coming years, NLP will become even more integrated into our lives—powering better communication tools, smarter assistants, and more intuitive ways of interacting with technology.
To conclude, NLP is not just about teaching machines how to read or talk—it’s about bridging the gap between humans and machines, making technology feel more natural and human-centric.
Thank you!
"""

## In above paragraph we have words like "to", "the", "of" which are not useful for analysis.
## We will remove these stop words using nltk library.

In [None]:
from nltk.corpus import stopwords

In [None]:
import nltk
nltk.download('stopwords')

In [None]:
## There will be different languages stopwords available in nltk library.
## Here you also can create your own list of stopwords., in the below words few words like not, we can't remove this kind of words, it represnet the negative meaning.
stopwords.words('english')

In [None]:
stopwords.words('german')

In [None]:
## Now we will apply stemming
from nltk.stem import PorterStemmer

In [None]:
stemmer = PorterStemmer()

In [None]:
sentences = nltk.sent_tokenize(paragraph)

In [None]:
type(sentences)

In [None]:
## Let's traverse through all the sentences, first apply a stopwords, 
# and whichever words are not present in the stop words, will take that and apply stemming


## Apply stowards and filter and then apply stemming
for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))] # using set to remove duplicates
    sentences[i] = ' '.join(words) # Converting all the words into a sentences


In [None]:
sentences

In [None]:
## Now we will apply snowball stemming
from nltk.stem import SnowballStemmer
snowballStemmer = SnowballStemmer("english")

In [None]:
## Apply snowball stemming
for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [snowballStemmer.stem(word) for word in words if word not in set(stopwords.words('english'))] # using set to remove duplicates
    sentences[i] = ' '.join(words) # Converting all the words into a sentences

In [None]:
sentences

In [None]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [None]:
## Apply lemmatization
for i in range(len(sentences)):
    sentences[i] = sentences[i].lower() # Converting all the letters into small
    words = nltk.word_tokenize(sentences[i])
    words = [lemmatizer.lemmatize(word, pos='v') for word in words if word not in set(stopwords.words('english'))] # using set to remove duplicates
    sentences[i] = ' '.join(words) # Converting all the words into a sentences

In [None]:
sentences