> These are notes from http://nlpworkgroup.postach.io/ converted into a Jupyter Notebook. Original material by Justin Barber, notebook-ified by Sean Boisen. 

> To use this material, you must first do the following:
* sudo easy_install pip         # Mac OS X
* pip install NLTK
* pip install requests

> See http://nlpworkgroup.postach.io/ for installation directions. 

# NLTK Sample Exercises

Some simple examples of what you can do with NLP. Easy first, then moderately difficult, and then graphical snippets. Created on Nov 17, 2015 by Justin Barber.

## DOWNLOAD A PROJECT GUTENBERG TEXT

In [None]:
# $ pip install requests
import requests
url = "http://www.gutenberg.org/files/2554/2554.txt"
request = requests.get(url)
crime_and_punishment = request.content
# this is the whole book! just print the first 500 chars
print crime_and_punishment[:500]

## WORD TOKENIZE A TEXT

In [None]:
from nltk import word_tokenize
word_tokens = word_tokenize(crime_and_punishment)
num_words = len(word_tokens)
print("Number of words:", num_words)

## SENTENCE TOKENIZE A TEXT

In [None]:
from nltk import sent_tokenize
sent_tokens = sent_tokenize(crime_and_punishment)
num_sents = len(sent_tokens)
print("Number of sentences:", num_sents)

## CREATE A CONCORDANCE

In [None]:
from nltk import Text
from nltk.corpus import brown
tokens = brown.words('cg13')
text = Text(tokens)
concordance = text.concordance("sex", lines=30)

## CONTEXTUAL SIMILARITY

In [None]:
from nltk import Text
from nltk.corpus import genesis
tokens = genesis.words('english-web.txt')
text = Text(tokens)
print("Words that occur in contexts similar to the contexts 'fought' occurs in:")
text.similar("fought")

## SEMANTIC SIMILARITY

In [None]:
from nltk.corpus import wordnet
bible = wordnet.synset('bible.n.01')
book = wordnet.synset('book.n.01')
scroll = wordnet.synset('scroll.n.02')
scroll_bible = scroll.lowest_common_hypernyms(bible)
book_bible = book.lowest_common_hypernyms(bible)
print("Lowest common hypernym for scroll and bible:", scroll_bible)
print("Lowest common hypernym for book and bible:", book_bible)

## PART OF SPEECH TAGGER

In [None]:
from nltk import pos_tag, word_tokenize
sentence = ("In the beginning when God created the heavens and the earth, "
                        "the earth was a formless void and darkness covered the face of "
                        "the deep, while a wind from God swept over the face of the waters.")
tokens = word_tokenize(sentence)
tagged_tokens = pos_tag(tokens)
print('Tokens tagged with part of speech:', tagged_tokens)

## PROPOSITIONAL LOGIC

In [None]:
from nltk import ResolutionProver
from nltk.sem import Expression
read_expr = Expression.fromstring
assumpt1 = read_expr('man(socrates)')                # socrates is a man
assumpt2 = read_expr('all x.(man(x) -> mortal(x))')  # for all x, if x is man, x is mortal
goal = read_expr('mortal(socrates)')                 # socrates is mortal
resolution = ResolutionProver().prove(goal, [assumpt1, assumpt2], verbose=True)
print("Socrates is mortal:", resolution)

## CHUNKING NOUN PHRASES

In [19]:
from nltk import pos_tag, RegexpParser, word_tokenize
# optional determiner (DT) followed by 0 or more adjectives (JJ) and then a noun (NN)
grammar = "NP: {<DT>?<JJ>*<NN>}"
# tag tokens
sentence = "Better is a poor but wise youth than an old but foolish king."
tokens = word_tokenize(sentence)
tagged_tokens = pos_tag(tokens)
# now chunk
chunk_parser = RegexpParser(grammar)
parsed = chunk_parser.parse(tagged_tokens)
# graph the results
parsed.draw()

## DISPERSION PLOT

In [21]:
# $ pip install matplotlib
import nltk
# if you haven't already downloaded the corpora
# nltk.download() # choose corpora tab and select genesis and click download
from nltk import Text
from nltk.corpus import genesis
tokens = genesis.words('english-kjv.txt')
text = Text(tokens)
text.dispersion_plot(["God", "man", "woman"])