# Tag the parts of speech of a source text, using the NLTK library

This file is built to serve a dual-purpose: As a Jupyter Notebook or standalone.

### Running standalone
When run from the command line, the source text is read in from the filename(s) specified as arguments.

#### Converting the Jyputer Notebook .ipynb file to a standalone .py script file
Convert this file into a standalone python script using the `ipynb2py.py` tool, to be executed from the command line.

### Running inside a Jupyter Notebook
The source text is specified inline in the second cell. You may need to uncomment the `pip install nltk`, the first line in the second cell, if the package is not found.

In [None]:
#pip install nltk
from IPython import get_ipython
import nltk

if get_ipython():
    nltk.download('tagsets')
    nltk.download('punkt')
    nltk.download('averaged_perceptron_tagger')
    sourceText = """Oh say can you see, by the dawn's early light, what so proudly we hailed at the twilight's last gleaming?"""

In [None]:
# load the user-friendly terminology for parts of speech
posDict = {}
with open("nltk-tagsets-pos.txt") as f:
    for line in f:
        row = line.split(" : ")
        key = row[0]
        val = row[1]
        posDict[key] = val.strip()

import sys
from IPython import get_ipython

if get_ipython():
    # Running inside a Jupyter Notebook, use this text
    sourceText = sourceText
else:
    # Running in python, load text from file(s) specified on the command line
    if (len(sys.argv)<2):
        print("missing input filename(s)")
        sys.exit()
    args = sys.argv[1:]

    sourceText = ""
    for inputFile in args:
        file = open(inputFile)
        text = file.read()
        file.close()
        if sourceText=="":
            sourceText = text
        else:
            sourceText = sourceText + "\n" + text

# tag the parts of speech
Sentence = nltk.word_tokenize(sourceText)
posTagged = nltk.pos_tag(Sentence)

x = 0
while x < len(posTagged):
    word, pos = posTagged[x]
    if pos in posDict:
        pos = posDict[pos]
    else:
        pos = "stet"
    print(word + " : " + pos)
    x = x + 1