```python
"I absolutely love this phone! The battery life is great, and the camera is amazing. Highly recommended!"
```

In [1]:
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Tokenization

In [2]:
from nltk.tokenize import word_tokenize,sent_tokenize
text = "I absolutely love this phone! The battery life is great, and the camera is amazing. Highly recommended!"
tokens = word_tokenize(text)
print(tokens)

['I', 'absolutely', 'love', 'this', 'phone', '!', 'The', 'battery', 'life', 'is', 'great', ',', 'and', 'the', 'camera', 'is', 'amazing', '.', 'Highly', 'recommended', '!']


## Removing Stopwords

In [6]:
from nltk.corpus import stopwords
nltk.download("stopwords")

stop_words = set(stopwords.words("english"))
filterd_tokens = [word for word in tokens if word.lower()  not in stop_words]
print(filterd_tokens)

['absolutely', 'love', 'phone', '!', 'battery', 'life', 'great', ',', 'camera', 'amazing', '.', 'Highly', 'recommended', '!']


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Stemming And Lemmatization

```python
Stemming
```

In [7]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filterd_tokens]
print(stemmed_words)

['absolut', 'love', 'phone', '!', 'batteri', 'life', 'great', ',', 'camera', 'amaz', '.', 'highli', 'recommend', '!']


```python
Lemmatization
```

In [8]:
from nltk.stem import WordNetLemmatizer
nltk.download("wordnet")
lemmatizer = WordNetLemmatizer()

lemmatized_words = [lemmatizer.lemmatize(word) for word in filterd_tokens]
print(lemmatized_words)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['absolutely', 'love', 'phone', '!', 'battery', 'life', 'great', ',', 'camera', 'amazing', '.', 'Highly', 'recommended', '!']


## Remove Punctuation

In [9]:
import string
cleaned_text = [word.lower() for word in filterd_tokens if word not in string.punctuation]
print(cleaned_text)

['absolutely', 'love', 'phone', 'battery', 'life', 'great', 'camera', 'amazing', 'highly', 'recommended']


### Practical 3a

In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download("punkt")
nltk.download("stopwords")

senetence = "The sun was shining brightly in the sky and a gentle breeze was blowing throught the trees."
# Tokenize the sentence
tokens = word_tokenize(senetence)

# Define stop words
stop_words = set(stopwords.words("english"))

# Filter out stop words
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]


print(filtered_tokens)

['sun', 'shining', 'brightly', 'sky', 'gentle', 'breeze', 'blowing', 'throught', 'trees', '.']


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
import spacy

In [5]:
nlp=spacy.load("en_core_web_sm")
nlp.Defaults.stop_words.add("NIL")
nlp.Defaults.stop_words.add("JUNK")

In [6]:
text = "This is a JUNK sentence that contains NIL information but is useful for testing"

In [7]:
doc  = nlp(text)

In [10]:
filtered_words = [token.text for token in doc if token.text.lower() not in nlp.Defaults.stop_words]
print("Original text",text)
print("Filtered words"," ".join(filtered_words))

Original text This is a JUNK sentence that contains NIL information but is useful for testing
Filtered words JUNK sentence contains NIL information useful testing


In [1]:
import nltk
from nltk.stem import PorterStemmer
porter_stemmer= PorterStemmer()

words=["running", "ran", "jumps", "jumped", "happiness", "happy"]

stemmed_words=[porter_stemmer.stem(word) for word in words]

for original, stemmed in zip(words, stemmed_words):
    print(f"{original} -> {stemmed}")

running -> run
ran -> ran
jumps -> jump
jumped -> jump
happiness -> happi
happy -> happi


In [5]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

nltk.download("punkt")
nltk.download("wordnet")

lemmatizer = WordNetLemmatizer()

paragraph = """
    Natural Language Processing is a subfield of artificial intelligence that focuses on the interaction between computers and humans using natural language. It involves techniques for analyzing, understanding, and generating human language, enabling applications such as machine translation, chatbots, sentiment analysis, and speech recognition
"""
tokens = word_tokenize(paragraph)
lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
print(lemmatized_tokens)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['Natural', 'Language', 'Processing', 'is', 'a', 'subfield', 'of', 'artificial', 'intelligence', 'that', 'focus', 'on', 'the', 'interaction', 'between', 'computer', 'and', 'human', 'using', 'natural', 'language', '.', 'It', 'involves', 'technique', 'for', 'analyzing', ',', 'understanding', ',', 'and', 'generating', 'human', 'language', ',', 'enabling', 'application', 'such', 'a', 'machine', 'translation', ',', 'chatbots', ',', 'sentiment', 'analysis', ',', 'and', 'speech', 'recognition']
