In [1]:
import pandas as pd
import string
import nltk
import re

pd.set_option('display.max_rows', None)

In [2]:
## Import Training Data (Text Corpus):
with open('training-text.txt', encoding='utf-8') as input_file:
    training_text = input_file.read()
    
print(training_text)

Music theory is the study of the practices and possibilities of music. The Oxford Companion to Music describes three interrelated uses of the term "music theory". The first is the "rudiments", that are needed to understand music notation (key signatures, time signatures, and rhythmic notation); the second is learning scholars' views on music from antiquity to the present; the third is a sub-topic of musicology that "seeks to define processes and general principles in music". The musicological approach to theory differs from music analysis "in that it takes as its starting-point not the individual work or performance but the fundamental materials from which it is built."[1]

Music theory is frequently concerned with describing how musicians and composers make music, including tuning systems and composition methods among other topics. Because of the ever-expanding conception of what constitutes music, a more inclusive definition could be the consideration of any sonic phenomena, includin

In [3]:
## Clean Training Data: (Preprocessing)
## Remove Punctuation Characters:
training_text = training_text.translate(str.maketrans('', '', string.punctuation))
## Remove Line Breaks:
training_text = training_text.replace('\n','')
## Remove Numerical Characters:
training_text = re.sub(r'[0-9]+', '', training_text)
## Lowercase Characters:
training_text = training_text.lower()
## Split String into List:
training_text_word_list = training_text.split()
print(training_text_word_list)

['music', 'theory', 'is', 'the', 'study', 'of', 'the', 'practices', 'and', 'possibilities', 'of', 'music', 'the', 'oxford', 'companion', 'to', 'music', 'describes', 'three', 'interrelated', 'uses', 'of', 'the', 'term', 'music', 'theory', 'the', 'first', 'is', 'the', 'rudiments', 'that', 'are', 'needed', 'to', 'understand', 'music', 'notation', 'key', 'signatures', 'time', 'signatures', 'and', 'rhythmic', 'notation', 'the', 'second', 'is', 'learning', 'scholars', 'views', 'on', 'music', 'from', 'antiquity', 'to', 'the', 'present', 'the', 'third', 'is', 'a', 'subtopic', 'of', 'musicology', 'that', 'seeks', 'to', 'define', 'processes', 'and', 'general', 'principles', 'in', 'music', 'the', 'musicological', 'approach', 'to', 'theory', 'differs', 'from', 'music', 'analysis', 'in', 'that', 'it', 'takes', 'as', 'its', 'startingpoint', 'not', 'the', 'individual', 'work', 'or', 'performance', 'but', 'the', 'fundamental', 'materials', 'from', 'which', 'it', 'is', 'builtmusic', 'theory', 'is', 'fr

In [4]:
pos_tokenized_words = nltk.pos_tag(training_text_word_list)
print(pos_tokenized_words)

[('music', 'NN'), ('theory', 'NN'), ('is', 'VBZ'), ('the', 'DT'), ('study', 'NN'), ('of', 'IN'), ('the', 'DT'), ('practices', 'NNS'), ('and', 'CC'), ('possibilities', 'NNS'), ('of', 'IN'), ('music', 'NN'), ('the', 'DT'), ('oxford', 'NN'), ('companion', 'NN'), ('to', 'TO'), ('music', 'NN'), ('describes', 'NNS'), ('three', 'CD'), ('interrelated', 'JJ'), ('uses', 'NNS'), ('of', 'IN'), ('the', 'DT'), ('term', 'NN'), ('music', 'NN'), ('theory', 'NN'), ('the', 'DT'), ('first', 'JJ'), ('is', 'VBZ'), ('the', 'DT'), ('rudiments', 'NNS'), ('that', 'WDT'), ('are', 'VBP'), ('needed', 'VBN'), ('to', 'TO'), ('understand', 'VB'), ('music', 'NN'), ('notation', 'NN'), ('key', 'JJ'), ('signatures', 'NNS'), ('time', 'NN'), ('signatures', 'NNS'), ('and', 'CC'), ('rhythmic', 'JJ'), ('notation', 'NN'), ('the', 'DT'), ('second', 'JJ'), ('is', 'VBZ'), ('learning', 'VBG'), ('scholars', 'NNS'), ('views', 'NNS'), ('on', 'IN'), ('music', 'NN'), ('from', 'IN'), ('antiquity', 'NN'), ('to', 'TO'), ('the', 'DT'), ('p

In [5]:
pos_df = pd.DataFrame(pos_tokenized_words, columns = ['word','pos tag'])
pos_df.head()

Unnamed: 0,word,pos tag
0,music,NN
1,theory,NN
2,is,VBZ
3,the,DT
4,study,NN


In [6]:
pos_abbreviation_dict = {
             'CC':'coordinating conjunction',
             'CD':'cardinal digit',
             'DT':'determiner',
             'EX':'existential there',
             'FW':'foreign word',
             'IN':'preposition/subordinating conjunction',
             'JJ':'This NLTK POS Tag is an adjective (large)',
             'JJR':'adjective, comparative (larger)',
             'JJS':'adjective, superlative (largest)',
             'LS':'list market',
             'MD':'modal (could, will)',
             'NN':'noun, singular (cat, tree)',
             'NNS':'noun plural (desks)',
             'NNP':'proper noun, singular (sarah)',
             'NNPS':'proper noun, plural (indians or americans)',
             'PDT':'predeterminer (all, both, half)',
             'POS':'possessive ending (parent‘s)',
             'PRP':'personal pronoun (hers, herself, him, himself)',
             'PRP$':'possessive pronoun (her, his, mine, my, our)',
             'RB':'adverb (occasionally, swiftly)',
             'RBR':'adverb, comparative (greater)',
             'RBS':'adverb, superlative (biggest)',
             'RP':'particle (about)',
             'TO':'infinite marker (to)',
             'UH':'interjection (goodbye)',
             'VB':'verb (ask)',
             'VBG':'verb gerund (judging)',
             'VBD':'verb past tense (pleaded)',
             'VBN':'verb past participle (reunified)',
             'VBP':'verb, present tense not 3rd person singular (wrap)',
             'VBZ':'verb, present tense with 3rd person singular (bases)',
             'WDT':'wh-determiner (that, what)',
             'WP':'wh- pronoun (who)',
             'WRB':'wh- adverb (how)'
            }

pos_df['pos tag meaning'] = pos_df['pos tag'].map(pos_abbreviation_dict)
pos_df

Unnamed: 0,word,pos tag,pos tag meaning
0,music,NN,"noun, singular (cat, tree)"
1,theory,NN,"noun, singular (cat, tree)"
2,is,VBZ,"verb, present tense with 3rd person singular (..."
3,the,DT,determiner
4,study,NN,"noun, singular (cat, tree)"
5,of,IN,preposition/subordinating conjunction
6,the,DT,determiner
7,practices,NNS,noun plural (desks)
8,and,CC,coordinating conjunction
9,possibilities,NNS,noun plural (desks)
