## Investigative POS Tagging (nltk vs spaCy)

### Downloads

In [None]:
pip install -U spacy

In [None]:
pip install plantuml

### Imports

In [3]:
# Spacy Imports
import spacy 
from spacy.lang.en import English

# TextBlob Imports 
from textblob import TextBlob

# nltk Imports 
import nltk
from nltk.corpus import stopwords 

#plantUML Imports 
import plantuml
from plantuml import PlantUML

# Other Imports 
import string
from os.path import abspath

### nltk downloads

In [None]:
nltk.download('punkt')

In [None]:
nltk.download('averaged_perceptron_tagger')

### Text POS Tags 
Investigating the POS Tagging Attribute

In [20]:
text = " 2.	LR_Bus establishes connection between Left_Side_FGS and Right_Side_FGS."
text = nltk.word_tokenize(text)
l1 = nltk.pos_tag(text)
l1

[('2', 'CD'),
 ('.', '.'),
 ('LR_Bus', 'NNP'),
 ('establishes', 'VBZ'),
 ('connection', 'NN'),
 ('between', 'IN'),
 ('Left_Side_FGS', 'NNP'),
 ('and', 'CC'),
 ('Right_Side_FGS', 'NNP'),
 ('.', '.')]

In [21]:
# Noun Extraction (only extracting Proper Nouns)

nltk_nouns = []
for index,tuple in enumerate(l1):
  if tuple[1] == 'NNP':
    nltk_nouns.append(tuple[0])

nltk_nouns

['LR_Bus', 'Left_Side_FGS', 'Right_Side_FGS']

### Creating text file to store info to be runned in PlantUML 

In [23]:
# specification text
text = (" 1.	The FGS_System consists of four components: the Left_Side_FGS, the Right_Side_FGS, an LR_Bus, and an RL_Bus."
        " 2.	LR_Bus establishes connection between Left_Side_FGS and Right_Side_FGS."
        " 3.	RL_Bus establishes connection between Left_Side_FGS and Right_Side_FGS."
        " 4.	The Left_Side_FGS accepts as input a boolean value of Left_Transfer_Switch and Left_Primary_Side."
        " 5.	The Right_Side_FGS accepts as input a boolean value of Right_Transfer_Switch and Right_Primary_Side."
        " 6.	The Left_Side_FGS takes input from a synchronous clock CLK1."
        " 7.	The LR_Bus takes input from CLK2."
        " 8.	The Right_Side_FGS takes input from a synchronous clock CLK3."
        " 9.	The RL_Bus takes input from CLK4.")


In [24]:
 from nltk.tokenize import sent_tokenize, word_tokenize
 sent_tokenize(text)

[' 1.',
 'The FGS_System consists of four components: the Left_Side_FGS, the Right_Side_FGS, an LR_Bus, and an RL_Bus.',
 '2.',
 'LR_Bus establishes connection between Left_Side_FGS and Right_Side_FGS.',
 '3.',
 'RL_Bus establishes connection between Left_Side_FGS and Right_Side_FGS.',
 '4.',
 'The Left_Side_FGS accepts as input a boolean value of Left_Transfer_Switch and Left_Primary_Side.',
 '5.',
 'The Right_Side_FGS accepts as input a boolean value of Right_Transfer_Switch and Right_Primary_Side.',
 '6.',
 'The Left_Side_FGS takes input from a synchronous clock CLK1.',
 '7.',
 'The LR_Bus takes input from CLK2.',
 '8.',
 'The Right_Side_FGS takes input from a synchronous clock CLK3.',
 '9.',
 'The RL_Bus takes input from CLK4.']

In [25]:
# Editing or creating text file if it doesn't exists
# Creating the code for plantuml

# Used to tokenize a sentence 
doc = sent_tokenize(text)

connection_num = 1
with open('model_specs.txt', 'w') as f:
    with open('model_specs.txt','a') as f: 
      f.write("skinparam nodesep 150\n")
    # Iterate sentence-by-sentence then word-by-word 
    for sent in doc:

      # Tokenize that sentence 
      token_sentence = []
      token_sentence = nltk.word_tokenize(sent)

      # POS Tag the tokens 
      pos_tag_token = nltk.pos_tag(token_sentence)


      # Noun Extraction (only extracting Proper Nouns)

      nltk_nouns = []
      for index,tuple in enumerate(pos_tag_token):
        if tuple[1] == 'NNP':
          nltk_nouns.append(tuple[0])

      # print(nltk_nouns)
    
      i = 1
      j = 1
      for word in token_sentence: 
          if i < (len(nltk_nouns)):  
            if "connection" in token_sentence:
              # print(f"{nltk_nouns[0]}-{nltk_nouns[i]} :C{connection_num}")
              with open('model_specs.txt','a') as f: 
                f.write(f"[{nltk_nouns[0]}]-[{nltk_nouns[i]}] :C{connection_num}\n")
              
              i = i+1
              connection_num = connection_num+1
          else: 
            break
          
          if j < (len(nltk_nouns)):  
            if "input" in token_sentence: 
              # print(f"{nltk_nouns[0]}-{nltk_nouns[j]}")
              with open('model_specs.txt', 'a') as f:     
                f.write(f"[{nltk_nouns[0]}]-{nltk_nouns[j]}\n")
              j = j+1
              connection_num = connection_num + 1
          else: 
            break
f.close()

### Creating System Diagram using plantUML

In [26]:
server = PlantUML(url='http://www.plantuml.com/plantuml/img/',
                          basic_auth={},
                          form_auth={}, http_opts={}, request_opts={})

server.processes_file(abspath('model_specs.txt'))


True