# NLP

## Stemming

### Porter

In [None]:
from nltk.stem.porter import PorterStemmer

In [None]:
words = ["words", "eating", "went", "engineer", "tried", "happily", "better", "happy", "happiness", "unhappiness", "flies"] 

In [None]:
porter = PorterStemmer()

In [None]:
for word in words:
    print("Stem of " + word + " : " + porter.stem(word))

### Snowball

In [None]:
from nltk.stem.snowball import SnowballStemmer

In [None]:
snowball = SnowballStemmer("english")

In [None]:
for word in words:
    print("Stem of " + word + " : " + snowball.stem(word))

## Part-of-speech tagging

#### Download NLTK's currently recommended part of speech tagger

In [None]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

### n-grams

In [None]:
from nltk import ngrams

In [None]:
sentence = "He went to school yesterday and attended the classes"

In [None]:
for n in range(1,5):
    print("\n{}-grams".format(n))
    n_grams = ngrams(sentence.split(), n)

    for ngram in n_grams:
        print(ngram, end=" ")

### PoS Tagging

In [None]:
from nltk import pos_tag, word_tokenize

In [None]:
tokens = word_tokenize(sentence)
print(pos_tag(tokens))

#### Using spaCy

In [None]:
! pip3 install spacy
! python3 -m spacy download en_core_web_sm

#### PoS Tagging with spaCy

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [None]:
doc = nlp(sentence)
for token in doc:
    print(token.text, token.pos_, token.dep_)

In [None]:
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

## Lemmatization

In [None]:
from nltk.stem import WordNetLemmatizer

In [None]:
lemmatizer = WordNetLemmatizer()

In [None]:
for word in sentence.split():
    print(lemmatizer.lemmatize(word), end=' ')

In [None]:
from nltk.corpus import wordnet as wn
# Check the complete list of tags http://www.nltk.org/book/ch05.html
def wntag(tag):
    if tag.startswith("J"):
        return wn.ADJ
    elif tag.startswith("R"):
        return wn.ADV
    elif tag.startswith("N"):
        return wn.NOUN
    elif tag.startswith("V"):
        return wn.VERB
    return None

In [None]:
tokens = word_tokenize(sentence)

In [None]:
tokens

In [None]:
for token, tag in pos_tag(tokens):
    if wntag(tag):
        print(lemmatizer.lemmatize(token, wntag(tag)), end=' ')
    else:
        print(lemmatizer.lemmatize(token), end=' ')

#### Using spaCy

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
doc = nlp(sentence)

In [None]:
for token in doc:
    print(token.lemma_, end=' ')

## Morphology

In [None]:
from spacy import displacy

In [None]:
displacy.render(doc, style="dep")