<a href="https://colab.research.google.com/github/jtao/dswebinar/blob/master/nlp/NLP_with_NLTK.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Natural Language Processing with NLTK

[Jian Tao](https://coehpc.engr.tamu.edu/people/jian-tao/), Texas A&M University

June 30, 2023

Converted from 

**Intro to natural language processing with Python**

Notebook by [Juan Cruz Martinez](https://livecodestream.dev/authors/bajcmartinez/)

## Setting up the Environment

In [None]:
# this is to detect if we are running on Google Colab.
import sys
IN_COLAB = 'google.colab' in sys.modules
import nltk
if IN_COLAB:
  nltk.download('punkt')    
  nltk.download('stopwords')
  nltk.download('wordnet')
  nltk.download('omw-1.4')
  nltk.download('averaged_perceptron_tagger')
  nltk.download('maxent_ne_chunker')
  nltk.download('words')
else:
  nltk.data.path.append("./nltk_data")  

## Tokenization

In [None]:
from nltk.tokenize import word_tokenize
Text = "Good morning, How you doing? Are you coming tonight?"
Tokenized = word_tokenize(Text)
print(Tokenized)

In [None]:
from nltk.tokenize import sent_tokenize
Text = "Good morning, How you doing? Are you coming tonight?"
Tokenized = sent_tokenize(Text)
print(Tokenized)

## Stop words

In [None]:
from nltk.corpus import stopwords
stopwords = stopwords.words("english")
Text = ["Good", "morning", "How", "you", "doing", "Are", "you", "coming", "tonight"]
for i in Text:
   if i not in stopwords:
       print(i)

In [None]:
from nltk.corpus import stopwords
stopwords = stopwords.words("english")
','.join(stopwords)

## Stemming Words

In [None]:
help(nltk.stem)

In [None]:
from nltk.stem import PorterStemmer
ps = PorterStemmer()
words = ["Loving", "Chocolate", "Retrieving"]
for i in words:
   print(ps.stem(i))

## Counting Words

In [None]:
import nltk
words = ["men", "teacher", "men", "woman"]
FreqDist = nltk.FreqDist(words)
for i,j in FreqDist.items():
   print(i, "---", j)

## Word groups

In [None]:
words = "Learning python was such an amazing experience for me"
word_tokenize = nltk.word_tokenize(words)
print(list(nltk.bigrams(word_tokenize)))

In [None]:
word_tokenize = nltk.word_tokenize(words)
print(list(nltk.trigrams(word_tokenize)))

In [None]:
word_tokenize = nltk.word_tokenize(words)
print(list(nltk.ngrams(word_tokenize, 4)))


## Lemmatization

In [None]:
from nltk.stem import WordNetLemmatizer
Lem = WordNetLemmatizer()
print(Lem.lemmatize("believes"))
print(Lem.lemmatize("retrieved"))

In [None]:
from nltk.stem import WordNetLemmatizer
Lem = WordNetLemmatizer()
print(Lem.lemmatize("believes", pos="v"))
print(Lem.lemmatize("retrieved", pos="v"))

## POS Taggers
Exampls
* PRP	personal pronoun (hers, herself, him, himself)
* RB	adverb (occasionally, swiftly)
* VBP	verb, present tense not 3rd person singular(wrap)




In [None]:

words = "we work here"
word_tokenize = nltk.word_tokenize(words)
print(nltk.pos_tag(word_tokenize))

## Named Entity Recognition

In [None]:
Text = "tom is in london"
Tokenize = nltk.word_tokenize(Text)
POS_tags = nltk.pos_tag(Tokenize)
NameEn = nltk.ne_chunk(POS_tags)
print(NameEn)

In [None]:
from textblob import TextBlob
Joe_Biden_Tweet = "today is sunny"
Joe_Biden = TextBlob(Joe_Biden_Tweet)
print(Joe_Biden.sentiment)

## Spelling Correction

In [None]:
from textblob import TextBlob
Text = "Smalle businesses neede relief"
spelling_mistakes = TextBlob(Text)
print(spelling_mistakes.correct())